From 39d14c83130243f540d326bc8d93c3659ad443a1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 11 Mar 2024 10:59:43 -0500 Subject: [PATCH 01/30] Check column types with a utility that handles nesting correctly. --- cpp/include/cudf/detail/scatter.cuh | 5 +++-- cpp/include/cudf/utilities/type_checks.hpp | 23 ++++++++++++++++++++- cpp/src/copying/concatenate.cu | 7 ++----- cpp/src/copying/copy.cu | 13 +++++++++--- cpp/src/copying/copy_range.cu | 7 ++++--- cpp/src/copying/scatter.cu | 22 ++++++++------------ cpp/src/copying/shift.cu | 4 +++- cpp/src/dictionary/add_keys.cu | 5 +++-- cpp/src/dictionary/detail/concatenate.cu | 2 ++ cpp/src/dictionary/remove_keys.cu | 5 +++-- cpp/src/dictionary/replace.cu | 7 +++++-- cpp/src/dictionary/search.cu | 6 +++++- cpp/src/dictionary/set_keys.cu | 6 +++--- cpp/src/filling/fill.cu | 10 ++++++++- cpp/src/filling/sequence.cu | 4 +++- cpp/src/groupby/groupby.cu | 4 +++- cpp/src/interop/dlpack.cpp | 6 +++--- cpp/src/join/hash_join.cu | 8 ++----- cpp/src/labeling/label_bins.cu | 6 ++++-- cpp/src/lists/combine/concatenate_rows.cu | 7 ++----- cpp/src/lists/contains.cu | 3 ++- cpp/src/lists/sequences.cu | 5 +++-- cpp/src/reductions/reductions.cpp | 4 +++- cpp/src/reductions/segmented/reductions.cpp | 4 +++- cpp/src/replace/clamp.cu | 4 ++++ cpp/src/replace/nulls.cu | 7 ++++++- cpp/src/replace/replace.cu | 7 ++++--- cpp/src/rolling/detail/lead_lag_nested.cuh | 3 ++- cpp/src/search/contains_scalar.cu | 6 +++++- cpp/src/transform/one_hot_encode.cu | 4 +++- 30 files changed, 135 insertions(+), 69 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index dbf7bfa9527..d5484b14510 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -212,7 +213,7 @@ struct column_scatterer_impl { // check the keys match dictionary_column_view const source(source_in); dictionary_column_view const target(target_in); - CUDF_EXPECTS(source.keys().type() == target.keys().type(), + CUDF_EXPECTS(cudf::column_types_equal(source.keys(), target.keys()), "scatter dictionary keys must be the same type"); // first combine keys so both dictionaries have the same set diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index b925fc8ae92..2b326afe1f8 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include +#include + namespace cudf { /** @@ -47,4 +49,23 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs); */ bool column_types_equivalent(column_view const& lhs, column_view const& rhs); +/** + * @brief Compare the types of a range of `column_views` + * + * This function returns true if cudf::column_types_equal is true for every + * pair of consecutive columns in the range. + * + * @tparam ForwardIt Forward iterator + * @param first The first column + * @param last The last column + * @return true if all column types match + */ +template +inline bool all_column_types_equal(ForwardIt first, ForwardIt last) +{ + return std::all_of(std::next(first), last, [want = *first](auto const& c) { + return cudf::column_types_equal(want, c); + }); +} + } // namespace cudf diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index b1d850e0b27..d3c9a5f3579 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -460,11 +461,7 @@ void traverse_children::operator()(host_span */ void bounds_and_type_check(host_span cols, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(std::all_of(cols.begin(), - cols.end(), - [expected_type = cols.front().type()](auto const& c) { - return c.type() == expected_type; - }), + CUDF_EXPECTS(cudf::all_column_types_equal(cols.begin(), cols.end()), "Type mismatch in columns to concatenate."); // total size of all concatenated rows diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 6b7fae32d48..9fe4c31ced5 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -358,7 +359,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs and rhs columns"); CUDF_EXPECTS(lhs.size() == rhs.size(), "Both columns must be of the size"); - CUDF_EXPECTS(lhs.type() == rhs.type(), "Both inputs must be of the same type"); + CUDF_EXPECTS(cudf::column_types_equal(lhs, rhs), "Both inputs must be of the same type"); return copy_if_else(lhs, rhs, lhs.has_nulls(), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -372,7 +373,9 @@ std::unique_ptr copy_if_else(scalar const& lhs, CUDF_EXPECTS(boolean_mask.size() == rhs.size(), "Boolean mask column must be the same size as rhs column"); - auto rhs_type = + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. + auto const rhs_type = cudf::is_dictionary(rhs.type()) ? cudf::dictionary_column_view(rhs).keys_type() : rhs.type(); CUDF_EXPECTS(lhs.type() == rhs_type, "Both inputs must be of the same type"); @@ -388,6 +391,8 @@ std::unique_ptr copy_if_else(column_view const& lhs, CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs column"); + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. auto lhs_type = cudf::is_dictionary(lhs.type()) ? cudf::dictionary_column_view(lhs).keys_type() : lhs.type(); CUDF_EXPECTS(lhs_type == rhs.type(), "Both inputs must be of the same type"); @@ -401,6 +406,8 @@ std::unique_ptr copy_if_else(scalar const& lhs, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(lhs.type() == rhs.type(), "Both inputs must be of the same type"); return copy_if_else( lhs, rhs, !lhs.is_valid(stream), !rhs.is_valid(stream), boolean_mask, stream, mr); diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 61d51f1d284..e8a1f900ba3 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -145,7 +146,7 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator()= 0) && (target_begin <= target.size() - (source_end - source_begin)), "Range is out of bounds."); - CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch."); + CUDF_EXPECTS(cudf::column_types_equal(target, source), "Data type mismatch."); CUDF_EXPECTS(target.nullable() || not source.has_nulls(), "target should be nullable if source has null values."); @@ -233,7 +234,7 @@ std::unique_ptr copy_range(column_view const& source, (source_begin <= source_end) && (target_begin >= 0) && (target_begin <= target.size() - (source_end - source_begin)), "Range is out of bounds."); - CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch."); + CUDF_EXPECTS(cudf::column_types_equal(target, source), "Data type mismatch."); return cudf::type_dispatcher( target.type(), diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index baa5d85d4d4..b9755919e07 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -109,6 +109,8 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match"); // make a copy of data and null mask from source @@ -140,6 +142,8 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match"); auto const scalar_impl = static_cast(&source.get()); @@ -299,12 +303,7 @@ std::unique_ptr scatter(table_view const& source, "Number of columns in source and target not equal"); CUDF_EXPECTS(scatter_map.size() <= source.num_rows(), "Size of scatter map must be equal to or less than source rows"); - CUDF_EXPECTS(std::equal(source.begin(), - source.end(), - target.begin(), - [](auto const& col1, auto const& col2) { - return col1.type().id() == col2.type().id(); - }), + CUDF_EXPECTS(cudf::have_same_types(source, target), "Column types do not match between source and target"); CUDF_EXPECTS(not scatter_map.has_nulls(), "Scatter map contains nulls"); @@ -430,13 +429,8 @@ std::unique_ptr
boolean_mask_scatter(table_view const& input, "Boolean mask size and number of target rows mismatch"); CUDF_EXPECTS(boolean_mask.type().id() == type_id::BOOL8, "Mask must be of Boolean type"); // Count valid pair of input and columns as per type at each column index i - CUDF_EXPECTS( - std::all_of(thrust::counting_iterator(0), - thrust::counting_iterator(target.num_columns()), - [&input, &target](auto index) { - return ((input.column(index).type().id()) == (target.column(index).type().id())); - }), - "Type mismatch in input column and target column"); + CUDF_EXPECTS(cudf::have_same_types(input, target), + "Type mismatch in input column and target column"); if (target.num_rows() != 0) { std::vector> out_columns(target.num_columns()); @@ -470,6 +464,8 @@ std::unique_ptr
boolean_mask_scatter( // Count valid pair of input and columns as per type at each column/scalar index i CUDF_EXPECTS( + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. std::all_of(thrust::counting_iterator(0), thrust::counting_iterator(target.num_columns()), [&input, &target](auto index) { diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 89d6551737b..f9308b12659 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,6 +156,8 @@ std::unique_ptr shift(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.type() == fill_value.type(), "shift requires each fill value type to match the corresponding column type."); diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index 3973100aced..c992c7eac3f 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -53,7 +54,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls"); auto old_keys = dictionary_column.keys(); // [a,b,c,d,f] - CUDF_EXPECTS(new_keys.type() == old_keys.type(), "Keys must be the same type"); + CUDF_EXPECTS(cudf::column_types_equal(new_keys, old_keys), "Keys must be the same type"); // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] auto combined_keys = cudf::detail::concatenate( diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 17295fb0345..671df3df9e4 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -220,6 +220,8 @@ std::unique_ptr concatenate(host_span columns, // empty column may not have keys so we create an empty column_view place-holder if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0}; auto keys = dict_view.keys(); + // TODO: Compare using cudf::column_types_equal to ensure nested types are + // handled correctly. CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match"); return keys; }); diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 86b70f1119b..dc575163c3f 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -154,7 +155,7 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); - CUDF_EXPECTS(keys_view.type() == keys_to_remove.type(), "keys types must match"); + CUDF_EXPECTS(cudf::column_types_equal(keys_view, keys_to_remove), "keys types must match"); // locate keys to remove by searching the keys column auto const matches = cudf::detail::contains(keys_to_remove, keys_view, stream, mr); diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 7069993866c..d2673ab19c8 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -83,7 +84,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls()) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(input.keys().type() == replacement.keys().type(), "keys must match"); + CUDF_EXPECTS(cudf::column_types_equal(input.keys(), replacement.keys()), "keys must match"); CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); // first combine the keys so both input dictionaries have the same set @@ -118,6 +119,8 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, if (!input.has_nulls() || !replacement.is_valid(stream)) { return std::make_unique(input.parent(), stream, mr); } + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type"); // first add the replacement to the keys so only the indices need to be processed diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index e35aded1984..d432bef4982 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,6 +73,8 @@ struct find_index_fn { { if (!key.is_valid(stream)) return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.keys().type() == key.type(), "search key type must match dictionary keys type"); @@ -115,6 +117,8 @@ struct find_insert_index_fn { { if (!key.is_valid(stream)) return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.keys().type() == key.type(), "search key type must match dictionary keys type"); diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index b49cf7850b1..c48de765450 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -115,7 +116,6 @@ struct dispatch_compute_indices { } // namespace -// std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& new_keys, rmm::cuda_stream_view stream, @@ -123,7 +123,7 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls"); auto keys = dictionary_column.keys(); - CUDF_EXPECTS(keys.type() == new_keys.type(), "keys types must match"); + CUDF_EXPECTS(cudf::column_types_equal(keys, new_keys), "keys types must match"); // copy the keys -- use cudf::distinct to make sure there are no duplicates, // then sort the results. diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 42d1f7592ec..ec64f81fc75 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -109,6 +109,8 @@ struct out_of_place_fill_range_dispatch { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.type() == value.type(), "Data type mismatch."); auto p_ret = std::make_unique(input, stream, mr); @@ -136,6 +138,8 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator(); auto p_scalar = static_cast(&value); @@ -152,6 +156,8 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator()(input, stream, mr); cudf::dictionary_column_view const target(input); + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(target.keys().type() == value.type(), "Data type mismatch."); // if the scalar is invalid, then just copy the column and fill the null mask @@ -218,6 +224,8 @@ void fill_in_place(mutable_column_view& destination, "Range is out of bounds."); CUDF_EXPECTS(destination.nullable() || value.is_valid(stream), "destination should be nullable or value should be non-null."); + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(destination.type() == value.type(), "Data type mismatch."); if (end != begin) { // otherwise no-op diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index 99a17f8b0e0..3120991318b 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -127,6 +127,8 @@ std::unique_ptr sequence(size_type size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(init.type() == step.type(), "init and step must be of the same type."); CUDF_EXPECTS(size >= 0, "size must be >= 0"); CUDF_EXPECTS(is_numeric(init.type()), "Input scalar types must be numeric"); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index e3c021eb66a..877780658b5 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -312,6 +312,8 @@ std::pair, std::unique_ptr
> groupby::shift( CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), "Mismatch number of fill_values and columns."); CUDF_EXPECTS( + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. std::all_of(thrust::make_counting_iterator(0), thrust::make_counting_iterator(values.num_columns()), [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }), diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 9f36280930d..794da3b9fbc 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -230,9 +231,8 @@ DLManagedTensor* to_dlpack(table_view const& input, DLDataType const dltype = data_type_to_DLDataType(type); // Ensure all columns are the same type - CUDF_EXPECTS( - std::all_of(input.begin(), input.end(), [type](auto const& col) { return col.type() == type; }), - "All columns required to have same data type"); + CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), + "All columns required to have same data type"); // Ensure none of the columns have nulls CUDF_EXPECTS( diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 17616818a58..603c7ccc9cb 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -568,12 +569,7 @@ hash_join::compute_hash_join(cudf::table_view const& probe, std::make_unique>(0, stream, mr)); } - CUDF_EXPECTS(std::equal(std::cbegin(_build), - std::cend(_build), - std::cbegin(probe), - std::cend(probe), - [](auto const& b, auto const& p) { return b.type() == p.type(); }), - "Mismatch in joining column data types"); + CUDF_EXPECTS(cudf::have_same_types(_build, probe), "Mismatch in joining column data types"); return probe_join_indices(probe, join, output_size, stream, mr); } diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 9fecaa1ddb2..578288dfa3e 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -207,8 +208,9 @@ std::unique_ptr label_bins(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE() - CUDF_EXPECTS((input.type() == left_edges.type()) && (input.type() == right_edges.type()), - "The input and edge columns must have the same types."); + CUDF_EXPECTS( + cudf::column_types_equal(input, left_edges) && cudf::column_types_equal(input, right_edges), + "The input and edge columns must have the same types."); CUDF_EXPECTS(left_edges.size() == right_edges.size(), "The left and right edge columns must be of the same length."); CUDF_EXPECTS(!left_edges.has_nulls() && !right_edges.has_nulls(), diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index baecef3b92d..9de567fc321 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -204,11 +204,8 @@ std::unique_ptr concatenate_rows(table_view const& input, input.end(), [](column_view const& col) { return col.type().id() == cudf::type_id::LIST; }), "All columns of the input table must be of lists column type."); - CUDF_EXPECTS( - std::all_of(std::next(input.begin()), - input.end(), - [a = *input.begin()](column_view const& b) { return column_types_equal(a, b); }), - "The types of entries in the input columns must be the same."); + CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), + "The types of entries in the input columns must be the same."); auto const num_rows = input.num_rows(); auto const num_cols = input.num_columns(); diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 378cf678f1f..2a7957fc180 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -193,7 +194,7 @@ std::unique_ptr dispatch_index_of(lists_column_view const& lists, // comparisons. auto const child = lists.child(); - CUDF_EXPECTS(child.type() == search_keys.type(), + CUDF_EXPECTS(cudf::column_types_equal(child, search_keys), "Type/Scale of search key does not match list column element type.", cudf::data_type_error); CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty."); diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index f92ba782da7..7427619a5c0 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -145,7 +146,7 @@ std::unique_ptr sequences(column_view const& starts, CUDF_EXPECTS(!steps_cv.has_nulls(), "steps input column must not have nulls."); CUDF_EXPECTS(starts.size() == steps_cv.size(), "starts and steps input columns must have the same number of rows."); - CUDF_EXPECTS(starts.type() == steps_cv.type(), + CUDF_EXPECTS(cudf::column_types_equal(starts, steps_cv), "starts and steps input columns must have the same type."); } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 23171baaa45..8f869848064 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -153,6 +153,8 @@ std::unique_ptr reduce(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(), "column and initial value must be the same type"); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index cee82560794..5dae2057be5 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,6 +111,8 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(!init.has_value() || segmented_values.type() == init.value().get().type(), "column and initial value must be the same type"); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 3cd1fdd20a2..195e680cfda 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -197,6 +197,8 @@ struct dispatch_clamp { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(lo.type() == input.type(), "mismatching types of scalar and input"); auto lo_itr = make_optional_iterator(lo, nullate::YES{}); @@ -321,6 +323,8 @@ std::unique_ptr clamp(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(lo.type() == hi.type(), "mismatching types of limit scalars"); CUDF_EXPECTS(lo_replace.type() == hi_replace.type(), "mismatching types of replace scalars"); CUDF_EXPECTS(lo.type() == lo_replace.type(), "mismatching types of limit and replace scalars"); diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 8ea229368cc..69df50343ec 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -302,6 +303,8 @@ struct replace_nulls_scalar_kernel_forwarder { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.type() == replacement.type(), "Data type mismatch"); std::unique_ptr output = cudf::detail::allocate_like( input, input.size(), cudf::mask_allocation_policy::NEVER, stream, mr); @@ -338,6 +341,8 @@ std::unique_ptr replace_nulls_scalar_kernel_forwarder::operator()< rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(input.type() == replacement.type(), "Data type mismatch"); cudf::strings_column_view input_s(input); cudf::string_scalar const& repl = static_cast(replacement); @@ -404,7 +409,7 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(input.type() == replacement.type(), "Data type mismatch"); + CUDF_EXPECTS(cudf::column_types_equal(input, replacement), "Data type mismatch"); CUDF_EXPECTS(replacement.size() == input.size(), "Column size mismatch"); if (input.is_empty()) { return cudf::empty_like(input); } diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 184c30246c7..01cc506db8b 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -504,9 +505,9 @@ std::unique_ptr find_and_replace_all(cudf::column_view const& inpu CUDF_EXPECTS(values_to_replace.size() == replacement_values.size(), "values_to_replace and replacement_values size mismatch."); - CUDF_EXPECTS( - input_col.type() == values_to_replace.type() && input_col.type() == replacement_values.type(), - "Columns type mismatch"); + CUDF_EXPECTS(cudf::column_types_equal(input_col, values_to_replace) && + cudf::column_types_equal(input_col, replacement_values), + "Columns type mismatch"); CUDF_EXPECTS(not values_to_replace.has_nulls(), "values_to_replace must not have nulls"); if (input_col.is_empty() or values_to_replace.is_empty() or replacement_values.is_empty()) { diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index 66104fe5c77..80c7f7e8821 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -98,7 +99,7 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, { CUDF_EXPECTS(op == aggregation::LEAD || op == aggregation::LAG, "Unexpected aggregation type in compute_lead_lag_for_nested"); - CUDF_EXPECTS(default_outputs.type().id() == input.type().id(), + CUDF_EXPECTS(cudf::column_types_equal(input, default_outputs), "Defaults column type must match input column."); // Because LEAD/LAG. CUDF_EXPECTS(default_outputs.is_empty() || (input.size() == default_outputs.size()), diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 0b344ec347b..01d7c103032 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,6 +62,8 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(haystack.type() == needle.type(), "Scalar and column types must match"); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller // before dispatching to this function. @@ -87,6 +89,8 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { + // TODO: Need some utility like cudf::column_types_equivalent for scalars to + // ensure nested types are handled correctly. CUDF_EXPECTS(haystack.type() == needle.type(), "Scalar and column types must match"); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller // before dispatching to this function. diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 72f864346a4..2c00d278f04 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,8 @@ std::pair, table_view> one_hot_encode(column_view const& rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(input.type() == categories.type(), "Mismatch type between input and categories."); + CUDF_EXPECTS(cudf::column_types_equal(input, categories), + "Mismatch type between input and categories."); if (categories.is_empty()) { return {make_empty_column(type_id::BOOL8), table_view{}}; } From 5f7f1f89b769516e8bd1b52eb9db353e6c0b67b0 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Mar 2024 15:05:55 -0500 Subject: [PATCH 02/30] Add column_scalar_types_equal. --- cpp/include/cudf/utilities/type_checks.hpp | 18 +++++++++ cpp/src/replace/clamp.cu | 8 ++-- cpp/src/utilities/type_checks.cpp | 47 +++++++++++++++++++++- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 2b326afe1f8..5ac59e55071 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -38,6 +39,23 @@ namespace cudf { */ bool column_types_equal(column_view const& lhs, column_view const& rhs); +/** + * @brief Compares the type of a `column_view` and a `scalar` + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary types, the type of the keys are compared if both are + * non-empty columns. + * - For lists types, the type of child columns are compared recursively. + * - For struct types, the type of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param col The `column_view` to compare + * @param slr The `scalar` to compare + * @return true if column/scalar types match + */ +bool column_scalar_types_equal(column_view const& col, scalar const& slr); + /** * @brief Compare the type IDs of two `column_view`s * This function returns true if the type of `lhs` equals that of `rhs`. diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 195e680cfda..5de907130c0 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -197,9 +199,9 @@ struct dispatch_clamp { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(lo.type() == input.type(), "mismatching types of scalar and input"); + CUDF_EXPECTS(column_scalar_types_equal(input, lo), + "mismatching types of scalar and input", + cudf::data_type_error); auto lo_itr = make_optional_iterator(lo, nullate::YES{}); auto hi_itr = make_optional_iterator(hi, nullate::YES{}); diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index d6f5c65593a..d9e59c17cc2 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -59,6 +60,44 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_vi [&](auto i) { return column_types_equal(lhs.child(i), rhs.child(i)); }); } +struct column_scalar_equal_fn { + template + bool operator()(column_view const&, scalar const&) + { + return true; + } +}; + +template <> +bool column_scalar_equal_fn::operator()(column_view const& col, scalar const& slr) +{ + // It is not possible to have a scalar dictionary, so compare the dictionary + // column keys type to the scalar type. + auto col_keys = cudf::dictionary_column_view(col).keys(); + return column_scalar_types_equal(col_keys, slr); +} + +template <> +bool column_scalar_equal_fn::operator()(column_view const& col, scalar const& slr) +{ + if (slr.type().id() != type_id::LIST) { return false; } + auto const& ci = lists_column_view::child_column_index; + auto const list_slr = static_cast(&slr); + return column_types_equal(col.child(ci), list_slr->view()); +} + +template <> +bool column_scalar_equal_fn::operator()(column_view const& col, scalar const& slr) +{ + if (slr.type().id() != type_id::STRUCT) { return false; } + auto const struct_slr = static_cast(&slr); + auto const slr_tbl = struct_slr->view(); + return col.num_children() == slr_tbl.num_columns() and + std::all_of(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(col.num_children()), + [&](auto i) { return column_types_equal(col.child(i), slr_tbl.column(i)); }); +} + }; // namespace // Implementation note: avoid using double dispatch for this function @@ -69,6 +108,12 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs) return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } +bool column_scalar_types_equal(column_view const& col, scalar const& slr) +{ + if (col.type() != slr.type()) { return false; } + return type_dispatcher(col.type(), column_scalar_equal_fn{}, col, slr); +} + bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { if (lhs.type().id() != rhs.type().id()) { return false; } From d880838d5ad91a6c153be949943c0380c20f75f8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Mar 2024 15:53:20 -0500 Subject: [PATCH 03/30] Use column_scalar_types_equal. --- cpp/src/copying/copy.cu | 20 +++++--------- cpp/src/copying/scatter.cu | 30 ++++++++++----------- cpp/src/copying/shift.cu | 5 ++-- cpp/src/dictionary/replace.cu | 7 ++--- cpp/src/dictionary/search.cu | 22 ++++++++------- cpp/src/filling/fill.cu | 23 ++++++++-------- cpp/src/groupby/groupby.cu | 16 ++++++----- cpp/src/reductions/reductions.cpp | 9 ++++--- cpp/src/reductions/segmented/reductions.cpp | 9 ++++--- cpp/src/replace/nulls.cu | 14 +++++----- cpp/src/search/contains_scalar.cu | 14 +++++----- 11 files changed, 83 insertions(+), 86 deletions(-) diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 2bf9daa8678..17e3c230585 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -380,13 +380,9 @@ std::unique_ptr copy_if_else(scalar const& lhs, CUDF_EXPECTS(boolean_mask.size() == rhs.size(), "Boolean mask column must be the same size as rhs column", std::invalid_argument); - - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - auto const rhs_type = - cudf::is_dictionary(rhs.type()) ? cudf::dictionary_column_view(rhs).keys_type() : rhs.type(); - CUDF_EXPECTS( - lhs.type() == rhs_type, "Both inputs must be of the same type", cudf::data_type_error); + CUDF_EXPECTS(cudf::column_scalar_types_equal(rhs, lhs), + "Both inputs must be of the same type", + cudf::data_type_error); return copy_if_else(lhs, rhs, !lhs.is_valid(stream), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -400,13 +396,9 @@ std::unique_ptr copy_if_else(column_view const& lhs, CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs column", std::invalid_argument); - - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - auto lhs_type = - cudf::is_dictionary(lhs.type()) ? cudf::dictionary_column_view(lhs).keys_type() : lhs.type(); - CUDF_EXPECTS( - lhs_type == rhs.type(), "Both inputs must be of the same type", cudf::data_type_error); + CUDF_EXPECTS(cudf::column_scalar_types_equal(lhs, rhs), + "Both inputs must be of the same type", + cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), !rhs.is_valid(stream), boolean_mask, stream, mr); } diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index d6f88efd4be..8947e6f97df 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include @@ -111,9 +113,7 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(source.get().type() == target.type(), + CUDF_EXPECTS(cudf::column_scalar_types_equal(target, source.get()), "scalar and column types must match", cudf::data_type_error); @@ -146,9 +146,9 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match"); + CUDF_EXPECTS(cudf::column_scalar_types_equal(target, source.get()), + "scalar and column types must match", + cudf::data_type_error); auto const scalar_impl = static_cast(&source.get()); auto const source_view = string_view(scalar_impl->data(), scalar_impl->size()); @@ -480,16 +480,14 @@ std::unique_ptr
boolean_mask_scatter( cudf::data_type_error); // Count valid pair of input and columns as per type at each column/scalar index i - CUDF_EXPECTS( - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - std::all_of(thrust::counting_iterator(0), - thrust::counting_iterator(target.num_columns()), - [&input, &target](auto index) { - return (input[index].get().type().id() == target.column(index).type().id()); - }), - "Type mismatch in input scalar and target column", - cudf::data_type_error); + CUDF_EXPECTS(std::all_of(thrust::counting_iterator(0), + thrust::counting_iterator(target.num_columns()), + [&input, &target](auto index) { + return cudf::column_scalar_types_equal(target.column(index), + input[index].get()); + }), + "Type mismatch in input scalar and target column", + cudf::data_type_error); if (target.num_rows() != 0) { std::vector> out_columns(target.num_columns()); diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 11e326a912b..cd2c2f4be09 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -157,9 +158,7 @@ std::unique_ptr shift(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.type() == fill_value.type(), + CUDF_EXPECTS(cudf::column_scalar_types_equal(input, fill_value), "shift requires each fill value type to match the corresponding column type.", cudf::data_type_error); diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index d2673ab19c8..2de43d7278c 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -119,9 +120,9 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, if (!input.has_nulls() || !replacement.is_valid(stream)) { return std::make_unique(input.parent(), stream, mr); } - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type"); + CUDF_EXPECTS(cudf::column_scalar_types_equal(input.parent(), replacement), + "keys must match scalar type", + cudf::data_type_error); // first add the replacement to the keys so only the indices need to be processed auto input_matched = dictionary::detail::add_keys( diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index d432bef4982..4ad027a0904 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include @@ -71,12 +73,12 @@ struct find_index_fn { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - if (!key.is_valid(stream)) + if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.keys().type() == key.type(), - "search key type must match dictionary keys type"); + } + CUDF_EXPECTS(cudf::column_scalar_types_equal(input.parent(), key), + "search key type must match dictionary keys type", + cudf::data_type_error); using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); @@ -115,12 +117,12 @@ struct find_insert_index_fn { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - if (!key.is_valid(stream)) + if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.keys().type() == key.type(), - "search key type must match dictionary keys type"); + } + CUDF_EXPECTS(cudf::column_scalar_types_equal(input.parent(), key), + "search key type must match dictionary keys type", + cudf::data_type_error); using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index ec64f81fc75..c62ea44fbe6 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -109,9 +110,8 @@ struct out_of_place_fill_range_dispatch { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.type() == value.type(), "Data type mismatch."); + CUDF_EXPECTS( + cudf::column_scalar_types_equal(input, value), "Data type mismatch.", cudf::data_type_error); auto p_ret = std::make_unique(input, stream, mr); if (end != begin) { // otherwise no fill @@ -138,9 +138,8 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator(); auto p_scalar = static_cast(&value); return cudf::strings::detail::fill( @@ -156,9 +155,9 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator()(input, stream, mr); cudf::dictionary_column_view const target(input); - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(target.keys().type() == value.type(), "Data type mismatch."); + CUDF_EXPECTS(cudf::column_scalar_types_equal(target.parent(), value), + "Data type mismatch.", + cudf::data_type_error); // if the scalar is invalid, then just copy the column and fill the null mask if (!value.is_valid(stream)) { @@ -224,9 +223,9 @@ void fill_in_place(mutable_column_view& destination, "Range is out of bounds."); CUDF_EXPECTS(destination.nullable() || value.is_valid(stream), "destination should be nullable or value should be non-null."); - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(destination.type() == value.type(), "Data type mismatch."); + CUDF_EXPECTS(cudf::column_scalar_types_equal(destination, value), + "Data type mismatch.", + cudf::data_type_error); if (end != begin) { // otherwise no-op cudf::type_dispatcher( diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 877780658b5..4c25d206c71 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -311,13 +312,14 @@ std::pair, std::unique_ptr
> groupby::shift( CUDF_FUNC_RANGE(); CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), "Mismatch number of fill_values and columns."); - CUDF_EXPECTS( - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.num_columns()), - [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }), - "values and fill_value should have the same type."); + CUDF_EXPECTS(std::all_of(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.num_columns()), + [&](auto i) { + return cudf::column_scalar_types_equal(values.column(i), + fill_values[i].get()); + }), + "values and fill_value should have the same type.", + cudf::data_type_error); auto stream = cudf::get_default_stream(); std::vector> results; diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 6cbb9c22d44..1e1dc4fd339 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include @@ -153,10 +155,9 @@ std::unique_ptr reduce(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(), - "column and initial value must be the same type"); + CUDF_EXPECTS(!init.has_value() || cudf::column_scalar_types_equal(col, init.value().get()), + "column and initial value must be the same type", + cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 5dae2057be5..037d29c3c09 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -111,10 +112,10 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(!init.has_value() || segmented_values.type() == init.value().get().type(), - "column and initial value must be the same type"); + CUDF_EXPECTS( + !init.has_value() || cudf::column_scalar_types_equal(segmented_values, init.value().get()), + "column and initial value must be the same type", + cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 4a5dec4ae3b..8922b68b793 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -307,9 +307,9 @@ struct replace_nulls_scalar_kernel_forwarder { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.type() == replacement.type(), "Data type mismatch"); + CUDF_EXPECTS(cudf::column_scalar_types_equal(input, replacement), + "Data type mismatch", + cudf::data_type_error); std::unique_ptr output = cudf::detail::allocate_like( input, input.size(), cudf::mask_allocation_policy::NEVER, stream, mr); auto output_view = output->mutable_view(); @@ -345,11 +345,11 @@ std::unique_ptr replace_nulls_scalar_kernel_forwarder::operator()< rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(input.type() == replacement.type(), "Data type mismatch"); + CUDF_EXPECTS(cudf::column_scalar_types_equal(input, replacement), + "Data type mismatch", + cudf::data_type_error); cudf::strings_column_view input_s(input); - cudf::string_scalar const& repl = static_cast(replacement); + auto const& repl = static_cast(replacement); return cudf::strings::detail::replace_nulls(input_s, repl, stream, mr); } diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 01d7c103032..1458a951c7a 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include @@ -62,9 +64,9 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(haystack.type() == needle.type(), "Scalar and column types must match"); + CUDF_EXPECTS(cudf::column_scalar_types_equal(haystack, needle), + "Scalar and column types must match", + cudf::data_type_error); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller // before dispatching to this function. @@ -89,9 +91,9 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(haystack.type() == needle.type(), "Scalar and column types must match"); + CUDF_EXPECTS(cudf::column_scalar_types_equal(haystack, needle), + "Scalar and column types must match", + cudf::data_type_error); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller // before dispatching to this function. // In addition, haystack and needle structure compatibility will be checked later on by From 8b152dc3cccce3250cfd277ead6424edf4d3c3bd Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Mar 2024 16:53:07 -0500 Subject: [PATCH 04/30] Fix bugs and update tests. --- cpp/src/utilities/type_checks.cpp | 23 +++++++++++++++-------- cpp/tests/copying/copy_range_tests.cpp | 2 +- cpp/tests/copying/copy_tests.cpp | 2 +- cpp/tests/dictionary/scatter_test.cpp | 2 +- cpp/tests/dictionary/search_test.cpp | 4 ++-- cpp/tests/filling/fill_tests.cpp | 4 ++-- cpp/tests/groupby/shift_tests.cpp | 6 +++--- cpp/tests/replace/clamp_test.cpp | 4 ++-- cpp/tests/replace/replace_nulls_tests.cpp | 4 ++-- 9 files changed, 29 insertions(+), 22 deletions(-) diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index d9e59c17cc2..57450a647aa 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -29,15 +30,16 @@ namespace { struct columns_equal_fn { template - bool operator()(column_view const&, column_view const&) + bool operator()(column_view const& lhs, column_view const& rhs) { - return true; + return lhs.type() == rhs.type(); } }; template <> bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) { + if (not cudf::is_dictionary(rhs.type())) { return false; } auto const kidx = dictionary_column_view::keys_column_index; return lhs.num_children() > 0 and rhs.num_children() > 0 ? lhs.child(kidx).type() == rhs.child(kidx).type() @@ -47,6 +49,7 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_v template <> bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) { + if (rhs.type().id() != type_id::LIST) { return false; } auto const& ci = lists_column_view::child_column_index; return column_types_equal(lhs.child(ci), rhs.child(ci)); } @@ -54,6 +57,7 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_view template <> bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) { + if (rhs.type().id() != type_id::STRUCT) { return false; } return lhs.num_children() == rhs.num_children() and std::all_of(thrust::make_counting_iterator(0), thrust::make_counting_iterator(lhs.num_children()), @@ -62,9 +66,9 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_vi struct column_scalar_equal_fn { template - bool operator()(column_view const&, scalar const&) + bool operator()(column_view const& col, scalar const& slr) { - return true; + return col.type() == slr.type(); } }; @@ -104,20 +108,23 @@ bool column_scalar_equal_fn::operator()(column_view const& col, sca // as it increases code paths to NxN for N types. bool column_types_equal(column_view const& lhs, column_view const& rhs) { - if (lhs.type() != rhs.type()) { return false; } return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } bool column_scalar_types_equal(column_view const& col, scalar const& slr) { - if (col.type() != slr.type()) { return false; } return type_dispatcher(col.type(), column_scalar_equal_fn{}, col, slr); } bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { - if (lhs.type().id() != rhs.type().id()) { return false; } - return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); + // Check if the columns have fixed point types. This is the only case where + // type equality and equivalence differ. + if (lhs.type().id() == type_id::DECIMAL32 || lhs.type().id() == type_id::DECIMAL64 || + lhs.type().id() == type_id::DECIMAL128) { + return lhs.type().id() == rhs.type().id(); + } + return column_types_equal(lhs, rhs); } } // namespace cudf diff --git a/cpp/tests/copying/copy_range_tests.cpp b/cpp/tests/copying/copy_range_tests.cpp index bcc0ac29b3e..223946ddcee 100644 --- a/cpp/tests/copying/copy_range_tests.cpp +++ b/cpp/tests/copying/copy_range_tests.cpp @@ -465,7 +465,7 @@ TEST_F(CopyRangeErrorTestFixture, DTypeMismatch) auto dict_target = cudf::dictionary::encode(target); auto dict_source = cudf::dictionary::encode(source); EXPECT_THROW(cudf::copy_range(dict_source->view(), dict_target->view(), 0, 100, 0), - cudf::logic_error); + cudf::data_type_error); } template diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp index 138e1935363..f31d8d6f79a 100644 --- a/cpp/tests/copying/copy_tests.cpp +++ b/cpp/tests/copying/copy_tests.cpp @@ -712,7 +712,7 @@ TEST_F(DictionaryCopyIfElseTest, TypeMismatch) cudf::test::dictionary_column_wrapper input2({1.0, 1.0, 1.0, 1.0}); cudf::test::fixed_width_column_wrapper mask({1, 0, 0, 1}); - EXPECT_THROW(cudf::copy_if_else(input1, input2, mask), cudf::logic_error); + EXPECT_THROW(cudf::copy_if_else(input1, input2, mask), cudf::data_type_error); cudf::string_scalar input3{"1"}; EXPECT_THROW(cudf::copy_if_else(input1, input3, mask), cudf::data_type_error); diff --git a/cpp/tests/dictionary/scatter_test.cpp b/cpp/tests/dictionary/scatter_test.cpp index 2a2841827d0..2f77f4ee621 100644 --- a/cpp/tests/dictionary/scatter_test.cpp +++ b/cpp/tests/dictionary/scatter_test.cpp @@ -141,5 +141,5 @@ TEST_F(DictionaryScatterTest, Error) EXPECT_THROW( cudf::scatter( cudf::table_view{{source->view()}}, scatter_map, cudf::table_view{{target->view()}}), - cudf::logic_error); + cudf::data_type_error); } diff --git a/cpp/tests/dictionary/search_test.cpp b/cpp/tests/dictionary/search_test.cpp index 600d00ac186..b49b4ce5aa0 100644 --- a/cpp/tests/dictionary/search_test.cpp +++ b/cpp/tests/dictionary/search_test.cpp @@ -77,9 +77,9 @@ TEST_F(DictionarySearchTest, Errors) { cudf::test::dictionary_column_wrapper dictionary({1, 2, 3}); cudf::numeric_scalar key(7); - EXPECT_THROW(cudf::dictionary::get_index(dictionary, key), cudf::logic_error); + EXPECT_THROW(cudf::dictionary::get_index(dictionary, key), cudf::data_type_error); EXPECT_THROW( cudf::dictionary::detail::get_insert_index( dictionary, key, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), - cudf::logic_error); + cudf::data_type_error); } diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp index 95a27defa4e..26badefe698 100644 --- a/cpp/tests/filling/fill_tests.cpp +++ b/cpp/tests/filling/fill_tests.cpp @@ -359,8 +359,8 @@ TEST_F(FillErrorTestFixture, DTypeMismatch) auto destination_view = cudf::mutable_column_view{destination}; - EXPECT_THROW(cudf::fill_in_place(destination_view, 0, 10, *p_val), cudf::logic_error); - EXPECT_THROW(auto p_ret = cudf::fill(destination, 0, 10, *p_val), cudf::logic_error); + EXPECT_THROW(cudf::fill_in_place(destination_view, 0, 10, *p_val), cudf::data_type_error); + EXPECT_THROW(auto p_ret = cudf::fill(destination, 0, 10, *p_val), cudf::data_type_error); } template diff --git a/cpp/tests/groupby/shift_tests.cpp b/cpp/tests/groupby/shift_tests.cpp index d2ecb667eca..1a6abf2e734 100644 --- a/cpp/tests/groupby/shift_tests.cpp +++ b/cpp/tests/groupby/shift_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -507,7 +507,7 @@ TEST_F(groupby_shift_fixed_point_type_test, MismatchScaleType) EXPECT_THROW(test_groupby_shift_multi( key, cudf::table_view{{v1}}, offset, {*slr1}, cudf::table_view{{stub}}), - cudf::logic_error); + cudf::data_type_error); } TEST_F(groupby_shift_fixed_point_type_test, MismatchRepType) @@ -525,5 +525,5 @@ TEST_F(groupby_shift_fixed_point_type_test, MismatchRepType) EXPECT_THROW(test_groupby_shift_multi( key, cudf::table_view{{v1}}, offset, {*slr1}, cudf::table_view{{stub}}), - cudf::logic_error); + cudf::data_type_error); } diff --git a/cpp/tests/replace/clamp_test.cpp b/cpp/tests/replace/clamp_test.cpp index bb33de1f1e7..beb710ec76e 100644 --- a/cpp/tests/replace/clamp_test.cpp +++ b/cpp/tests/replace/clamp_test.cpp @@ -53,7 +53,7 @@ TEST_F(ClampErrorTest, MisMatchingInputAndScalarTypes) cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6}); - EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::logic_error); + EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::data_type_error); } TEST_F(ClampErrorTest, MisMatchingReplaceScalarTypes) @@ -655,7 +655,7 @@ TYPED_TEST(FixedPointTest, MismatchedColumnScalarScale) auto const hi = cudf::make_fixed_point_scalar(8, scale); auto const input = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, scale_type{-4}}; - EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::logic_error); + EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::data_type_error); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index 6c23dd6bdc8..defc00a1fde 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -68,7 +68,7 @@ TEST_F(ReplaceErrorTest, TypeMismatchScalar) {0, 0, 1, 1, 1, 1, 1, 1}}; cudf::numeric_scalar replacement(1); - EXPECT_THROW(cudf::replace_nulls(input_column, replacement), cudf::logic_error); + EXPECT_THROW(cudf::replace_nulls(input_column, replacement), cudf::data_type_error); } struct ReplaceNullsStringsTest : public cudf::test::BaseFixture {}; @@ -660,7 +660,7 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsError) auto replacement = cudf::dictionary::encode(replacement_w); EXPECT_THROW(cudf::replace_nulls(input->view(), replacement->view()), cudf::logic_error); - EXPECT_THROW(cudf::replace_nulls(input->view(), cudf::string_scalar("x")), cudf::logic_error); + EXPECT_THROW(cudf::replace_nulls(input->view(), cudf::string_scalar("x")), cudf::data_type_error); cudf::test::fixed_width_column_wrapper input_one_w({1}, {0}); auto input_one = cudf::dictionary::encode(input_one_w); From 5cf476962d75eaf1da17b4db38d4c2bf91b78013 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 15:51:13 -0500 Subject: [PATCH 05/30] Add scalar_types_equal. --- cpp/include/cudf/utilities/type_checks.hpp | 21 +++++++++++-- cpp/src/copying/concatenate.cu | 4 ++- cpp/src/copying/copy.cu | 7 ++--- cpp/src/filling/sequence.cu | 7 +++-- cpp/src/replace/clamp.cu | 13 ++++---- cpp/src/utilities/type_checks.cpp | 35 ++++++++++++++++++++++ 6 files changed, 71 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 5ac59e55071..0e564dd4686 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -42,10 +42,10 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs); /** * @brief Compares the type of a `column_view` and a `scalar` * - * This function returns true if the type of `lhs` equals that of `rhs`. + * This function returns true if the type of `col` equals that of `slr`. * - For fixed point types, the scale is compared. - * - For dictionary types, the type of the keys are compared if both are - * non-empty columns. + * - For dictionary column types, the type of the keys are compared to the + * scalar type. * - For lists types, the type of child columns are compared recursively. * - For struct types, the type of each field are compared in order. * - For all other types, the `id` of `data_type` is compared. @@ -56,6 +56,21 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs); */ bool column_scalar_types_equal(column_view const& col, scalar const& slr); +/** + * @brief Compares the type of two `scalar`s + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For lists types, the type of child columns are compared recursively. + * - For struct types, the type of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The first `scalar to compare + * @param rhs The second `scalar to compare + * @return true if scalar types match + */ +bool scalar_types_equal(scalar const& lhs, scalar const& rhs); + /** * @brief Compare the type IDs of two `column_view`s * This function returns true if the type of `lhs` equals that of `rhs`. diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index d3c9a5f3579..84016fb5c93 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -462,7 +463,8 @@ void traverse_children::operator()(host_span void bounds_and_type_check(host_span cols, rmm::cuda_stream_view stream) { CUDF_EXPECTS(cudf::all_column_types_equal(cols.begin(), cols.end()), - "Type mismatch in columns to concatenate."); + "Type mismatch in columns to concatenate.", + cudf::data_type_error); // total size of all concatenated rows size_t const total_row_count = diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 17e3c230585..675211992df 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -409,10 +409,9 @@ std::unique_ptr copy_if_else(scalar const& lhs, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS( - lhs.type() == rhs.type(), "Both inputs must be of the same type", cudf::data_type_error); + CUDF_EXPECTS(cudf::scalar_types_equal(lhs, rhs), + "Both inputs must be of the same type", + cudf::data_type_error); return copy_if_else( lhs, rhs, !lhs.is_valid(stream), !rhs.is_valid(stream), boolean_mask, stream, mr); } diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index 3120991318b..cb65632ee55 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -127,9 +128,9 @@ std::unique_ptr sequence(size_type size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(init.type() == step.type(), "init and step must be of the same type."); + CUDF_EXPECTS(cudf::scalar_types_equal(init, step), + "init and step must be of the same type.", + cudf::data_type_error); CUDF_EXPECTS(size >= 0, "size must be >= 0"); CUDF_EXPECTS(is_numeric(init.type()), "Input scalar types must be numeric"); diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 5de907130c0..8cdf44fd9c8 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -325,11 +325,14 @@ std::unique_ptr clamp(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // TODO: Need some utility like cudf::column_types_equivalent for scalars to - // ensure nested types are handled correctly. - CUDF_EXPECTS(lo.type() == hi.type(), "mismatching types of limit scalars"); - CUDF_EXPECTS(lo_replace.type() == hi_replace.type(), "mismatching types of replace scalars"); - CUDF_EXPECTS(lo.type() == lo_replace.type(), "mismatching types of limit and replace scalars"); + CUDF_EXPECTS( + cudf::scalar_types_equal(lo, hi), "mismatching types of limit scalars", cudf::data_type_error); + CUDF_EXPECTS(cudf::scalar_types_equal(lo_replace, hi_replace), + "mismatching types of replace scalars", + cudf::data_type_error); + CUDF_EXPECTS(cudf::scalar_types_equal(lo, lo_replace), + "mismatching types of limit and replace scalars", + cudf::data_type_error); if ((not lo.is_valid(stream) and not hi.is_valid(stream)) or (input.is_empty())) { // There will be no change diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 57450a647aa..2c2a121754a 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -102,6 +102,36 @@ bool column_scalar_equal_fn::operator()(column_view const& col, sca [&](auto i) { return column_types_equal(col.child(i), slr_tbl.column(i)); }); } +struct scalars_equal_fn { + template + bool operator()(scalar const& lhs, scalar const& rhs) + { + return lhs.type() == rhs.type(); + } +}; + +template <> +bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& rhs) +{ + if (rhs.type().id() != type_id::LIST) { return false; } + auto const list_lhs = static_cast(&lhs); + auto const list_rhs = static_cast(&rhs); + return column_types_equal(list_lhs->view(), list_rhs->view()); +} + +template <> +bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& rhs) +{ + if (rhs.type().id() != type_id::STRUCT) { return false; } + auto const tbl_lhs = static_cast(&lhs)->view(); + auto const tbl_rhs = static_cast(&rhs)->view(); + return tbl_lhs.num_columns() == tbl_rhs.num_columns() and + std::all_of( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(tbl_lhs.num_columns()), + [&](auto i) { return column_types_equal(tbl_lhs.column(i), tbl_rhs.column(i)); }); +} + }; // namespace // Implementation note: avoid using double dispatch for this function @@ -116,6 +146,11 @@ bool column_scalar_types_equal(column_view const& col, scalar const& slr) return type_dispatcher(col.type(), column_scalar_equal_fn{}, col, slr); } +bool scalar_types_equal(scalar const& lhs, scalar const& rhs) +{ + return type_dispatcher(lhs.type(), scalars_equal_fn{}, lhs, rhs); +} + bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { // Check if the columns have fixed point types. This is the only case where From 8d938bb02bc61ead7ed920f0a2cb9a7670d7876b Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 15:54:35 -0500 Subject: [PATCH 06/30] Fix tests. --- cpp/tests/copying/concatenate_tests.cpp | 13 +++++++------ cpp/tests/filling/sequence_tests.cpp | 8 ++++---- cpp/tests/replace/clamp_test.cpp | 7 ++++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index 0f7c1053adf..84c32a9dbbc 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1255,7 +1256,7 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper b{{{LCW{}}}}; cudf::test::lists_column_wrapper c{{LCW{}}}; - EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::data_type_error); } { @@ -1264,7 +1265,7 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper b{{{LCW{}}}}; cudf::test::lists_column_wrapper c{{LCW{}}}; - EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::data_type_error); } { @@ -1272,14 +1273,14 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper b{1, 2, 3}; cudf::test::lists_column_wrapper c{{3, 4, 5}}; - EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::data_type_error); } { cudf::test::lists_column_wrapper a{{{1, 2, 3}}}; cudf::test::lists_column_wrapper b{{4, 5}}; - EXPECT_THROW(cudf::concatenate(std::vector({a, b})), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({a, b})), cudf::data_type_error); } } @@ -1634,7 +1635,7 @@ TEST_F(FixedPointTest, FixedPointScaleMismatch) auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2}); auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-3}); - EXPECT_THROW(cudf::concatenate(std::vector{a, b, c}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector{a, b, c}), cudf::data_type_error); } struct DictionaryConcatTest : public cudf::test::BaseFixture {}; @@ -1679,7 +1680,7 @@ TEST_F(DictionaryConcatTest, ErrorsTest) cudf::test::fixed_width_column_wrapper integers({10, 30, 20}); auto dictionary2 = cudf::dictionary::encode(integers); std::vector views({dictionary1->view(), dictionary2->view()}); - EXPECT_THROW(cudf::concatenate(views), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(views), cudf::data_type_error); std::vector empty; EXPECT_THROW(cudf::concatenate(empty), cudf::logic_error); } diff --git a/cpp/tests/filling/sequence_tests.cpp b/cpp/tests/filling/sequence_tests.cpp index cf619aace5a..5651a26f192 100644 --- a/cpp/tests/filling/sequence_tests.cpp +++ b/cpp/tests/filling/sequence_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,15 +102,15 @@ TEST_F(SequenceTestFixture, MismatchedInputs) { cudf::numeric_scalar init(0); cudf::numeric_scalar step(-5); - EXPECT_THROW(cudf::sequence(10, init, step), cudf::logic_error); + EXPECT_THROW(cudf::sequence(10, init, step), cudf::data_type_error); cudf::numeric_scalar init2(0); cudf::numeric_scalar step2(-5); - EXPECT_THROW(cudf::sequence(10, init2, step2), cudf::logic_error); + EXPECT_THROW(cudf::sequence(10, init2, step2), cudf::data_type_error); cudf::numeric_scalar init3(0); cudf::numeric_scalar step3(-5); - EXPECT_THROW(cudf::sequence(10, init3, step3), cudf::logic_error); + EXPECT_THROW(cudf::sequence(10, init3, step3), cudf::data_type_error); } TYPED_TEST(SequenceTypedTestFixture, DefaultStep) diff --git a/cpp/tests/replace/clamp_test.cpp b/cpp/tests/replace/clamp_test.cpp index beb710ec76e..239c9ce6ddd 100644 --- a/cpp/tests/replace/clamp_test.cpp +++ b/cpp/tests/replace/clamp_test.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -41,7 +42,7 @@ TEST_F(ClampErrorTest, MisMatchingScalarTypes) cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6}); - EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::logic_error); + EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::data_type_error); } TEST_F(ClampErrorTest, MisMatchingInputAndScalarTypes) @@ -69,7 +70,7 @@ TEST_F(ClampErrorTest, MisMatchingReplaceScalarTypes) cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6}); - EXPECT_THROW(cudf::clamp(input, *lo, *lo_replace, *hi, *hi_replace), cudf::logic_error); + EXPECT_THROW(cudf::clamp(input, *lo, *lo_replace, *hi, *hi_replace), cudf::data_type_error); } TEST_F(ClampErrorTest, InValidCase1) @@ -640,7 +641,7 @@ TYPED_TEST(FixedPointTest, MismatchedScalarScales) auto const hi = cudf::make_fixed_point_scalar(8, scale); auto const input = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, scale}; - EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::logic_error); + EXPECT_THROW(cudf::clamp(input, *lo, *hi), cudf::data_type_error); } TYPED_TEST(FixedPointTest, MismatchedColumnScalarScale) From 5430397a4e7e76d4feb30b166267aec349a44b53 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 16:44:24 -0500 Subject: [PATCH 07/30] Add data_type_errors. --- cpp/include/cudf/detail/scatter.cuh | 4 +++- cpp/src/copying/copy_range.cu | 3 ++- cpp/src/dictionary/add_keys.cu | 5 ++++- cpp/src/dictionary/remove_keys.cu | 5 ++++- cpp/src/dictionary/replace.cu | 4 +++- cpp/src/dictionary/set_keys.cu | 4 +++- cpp/src/interop/dlpack.cpp | 3 ++- cpp/src/join/hash_join.cu | 5 ++++- cpp/src/labeling/label_bins.cu | 3 ++- cpp/src/lists/combine/concatenate_rows.cu | 7 +++++-- cpp/src/lists/sequences.cu | 8 ++++++-- cpp/src/replace/nulls.cu | 3 ++- cpp/src/replace/replace.cu | 3 ++- cpp/src/rolling/detail/lead_lag_nested.cuh | 4 +++- cpp/src/transform/one_hot_encode.cu | 3 ++- 15 files changed, 47 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index d5484b14510..bb130737931 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -214,7 +215,8 @@ struct column_scatterer_impl { dictionary_column_view const source(source_in); dictionary_column_view const target(target_in); CUDF_EXPECTS(cudf::column_types_equal(source.keys(), target.keys()), - "scatter dictionary keys must be the same type"); + "scatter dictionary keys must be the same type", + cudf::data_type_error); // first combine keys so both dictionaries have the same set auto target_matched = dictionary::detail::add_keys(target, source.keys(), stream, mr); diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 444162a5d5f..7e6e0ea0411 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -148,7 +148,8 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator() #include #include +#include #include #include @@ -54,7 +55,9 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls"); auto old_keys = dictionary_column.keys(); // [a,b,c,d,f] - CUDF_EXPECTS(cudf::column_types_equal(new_keys, old_keys), "Keys must be the same type"); + CUDF_EXPECTS(cudf::column_types_equal(new_keys, old_keys), + "Keys must be the same type", + cudf::data_type_error); // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] auto combined_keys = cudf::detail::concatenate( diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index dc575163c3f..e84828a4235 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -155,7 +156,9 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); - CUDF_EXPECTS(cudf::column_types_equal(keys_view, keys_to_remove), "keys types must match"); + CUDF_EXPECTS(cudf::column_types_equal(keys_view, keys_to_remove), + "keys types must match", + cudf::data_type_error); // locate keys to remove by searching the keys column auto const matches = cudf::detail::contains(keys_to_remove, keys_view, stream, mr); diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 2de43d7278c..1c06a9e54c9 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -85,7 +85,9 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls()) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(cudf::column_types_equal(input.keys(), replacement.keys()), "keys must match"); + CUDF_EXPECTS(cudf::column_types_equal(input.keys(), replacement.keys()), + "keys must match", + cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); // first combine the keys so both input dictionaries have the same set diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index c48de765450..6afdbbdaea4 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -123,7 +124,8 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls"); auto keys = dictionary_column.keys(); - CUDF_EXPECTS(cudf::column_types_equal(keys, new_keys), "keys types must match"); + CUDF_EXPECTS( + cudf::column_types_equal(keys, new_keys), "keys types must match", cudf::data_type_error); // copy the keys -- use cudf::distinct to make sure there are no duplicates, // then sort the results. diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 794da3b9fbc..33ec78ba090 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -232,7 +232,8 @@ DLManagedTensor* to_dlpack(table_view const& input, // Ensure all columns are the same type CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), - "All columns required to have same data type"); + "All columns required to have same data type", + cudf::data_type_error); // Ensure none of the columns have nulls CUDF_EXPECTS( diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 603c7ccc9cb..4350e854e02 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -569,7 +570,9 @@ hash_join::compute_hash_join(cudf::table_view const& probe, std::make_unique>(0, stream, mr)); } - CUDF_EXPECTS(cudf::have_same_types(_build, probe), "Mismatch in joining column data types"); + CUDF_EXPECTS(cudf::have_same_types(_build, probe), + "Mismatch in joining column data types", + cudf::data_type_error); return probe_join_indices(probe, join, output_size, stream, mr); } diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 578288dfa3e..21114e0de1a 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -210,7 +210,8 @@ std::unique_ptr label_bins(column_view const& input, CUDF_FUNC_RANGE() CUDF_EXPECTS( cudf::column_types_equal(input, left_edges) && cudf::column_types_equal(input, right_edges), - "The input and edge columns must have the same types."); + "The input and edge columns must have the same types.", + cudf::data_type_error); CUDF_EXPECTS(left_edges.size() == right_edges.size(), "The left and right edge columns must be of the same length."); CUDF_EXPECTS(!left_edges.has_nulls() && !right_edges.has_nulls(), diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index 9de567fc321..893d5907f5f 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -203,9 +204,11 @@ std::unique_ptr concatenate_rows(table_view const& input, std::all_of(input.begin(), input.end(), [](column_view const& col) { return col.type().id() == cudf::type_id::LIST; }), - "All columns of the input table must be of lists column type."); + "All columns of the input table must be of lists column type.", + cudf::data_type_error); CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), - "The types of entries in the input columns must be the same."); + "The types of entries in the input columns must be the same.", + cudf::data_type_error); auto const num_rows = input.num_rows(); auto const num_cols = input.num_columns(); diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index 7427619a5c0..d9fa0be76eb 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -139,7 +140,9 @@ std::unique_ptr sequences(column_view const& starts, "starts and sizes input columns must not have nulls."); CUDF_EXPECTS(starts.size() == sizes.size(), "starts and sizes input columns must have the same number of rows."); - CUDF_EXPECTS(cudf::is_index_type(sizes.type()), "Input sizes column must be of integer types."); + CUDF_EXPECTS(cudf::is_index_type(sizes.type()), + "Input sizes column must be of integer types.", + cudf::data_type_error); if (steps) { auto const& steps_cv = steps.value(); @@ -147,7 +150,8 @@ std::unique_ptr sequences(column_view const& starts, CUDF_EXPECTS(starts.size() == steps_cv.size(), "starts and steps input columns must have the same number of rows."); CUDF_EXPECTS(cudf::column_types_equal(starts, steps_cv), - "starts and steps input columns must have the same type."); + "starts and steps input columns must have the same type.", + cudf::data_type_error); } auto const n_lists = starts.size(); diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 8922b68b793..b6167e15565 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -413,7 +413,8 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::column_types_equal(input, replacement), "Data type mismatch"); + CUDF_EXPECTS( + cudf::column_types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "Column size mismatch"); if (input.is_empty()) { return cudf::empty_like(input); } diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 90e2ec1dd7c..fe47234ac4c 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -305,7 +305,8 @@ std::unique_ptr find_and_replace_all(cudf::column_view const& inpu CUDF_EXPECTS(cudf::column_types_equal(input_col, values_to_replace) && cudf::column_types_equal(input_col, replacement_values), - "Columns type mismatch"); + "Columns type mismatch", + cudf::data_type_error); CUDF_EXPECTS(not values_to_replace.has_nulls(), "values_to_replace must not have nulls"); if (input_col.is_empty() or values_to_replace.is_empty() or replacement_values.is_empty()) { diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index 80c7f7e8821..407d02cf8f2 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -100,7 +101,8 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, CUDF_EXPECTS(op == aggregation::LEAD || op == aggregation::LAG, "Unexpected aggregation type in compute_lead_lag_for_nested"); CUDF_EXPECTS(cudf::column_types_equal(input, default_outputs), - "Defaults column type must match input column."); // Because LEAD/LAG. + "Defaults column type must match input column.", + cudf::data_type_error); // Because LEAD/LAG. CUDF_EXPECTS(default_outputs.is_empty() || (input.size() == default_outputs.size()), "Number of defaults must match input column."); diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 2c00d278f04..9bec5e0f7fe 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -62,7 +62,8 @@ std::pair, table_view> one_hot_encode(column_view const& rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(cudf::column_types_equal(input, categories), - "Mismatch type between input and categories."); + "Mismatch type between input and categories.", + cudf::data_type_error); if (categories.is_empty()) { return {make_empty_column(type_id::BOOL8), table_view{}}; } From 8e02603da4ae7b196bc8d63eb3f16e14ce6893e7 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 17:35:51 -0500 Subject: [PATCH 08/30] Fix tests. --- cpp/tests/dictionary/add_keys_test.cpp | 3 ++- cpp/tests/dictionary/remove_keys_test.cpp | 3 ++- cpp/tests/dictionary/set_keys_test.cpp | 3 ++- cpp/tests/interop/dlpack_test.cpp | 3 ++- cpp/tests/labeling/label_bins_tests.cpp | 7 ++++--- cpp/tests/lists/combine/concatenate_rows_tests.cpp | 5 +++-- cpp/tests/lists/sequences_tests.cpp | 7 ++++--- cpp/tests/replace/replace_nulls_tests.cpp | 6 +++--- cpp/tests/replace/replace_tests.cpp | 3 ++- cpp/tests/transform/one_hot_encode_tests.cpp | 5 +++-- 10 files changed, 27 insertions(+), 18 deletions(-) diff --git a/cpp/tests/dictionary/add_keys_test.cpp b/cpp/tests/dictionary/add_keys_test.cpp index 1314375f383..46bf5468922 100644 --- a/cpp/tests/dictionary/add_keys_test.cpp +++ b/cpp/tests/dictionary/add_keys_test.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -83,7 +84,7 @@ TEST_F(DictionaryAddKeysTest, Errors) auto dictionary = cudf::dictionary::encode(input); cudf::test::fixed_width_column_wrapper new_keys{1.0, 2.0, 3.0}; - EXPECT_THROW(cudf::dictionary::add_keys(dictionary->view(), new_keys), cudf::logic_error); + EXPECT_THROW(cudf::dictionary::add_keys(dictionary->view(), new_keys), cudf::data_type_error); cudf::test::fixed_width_column_wrapper null_keys{{1, 2, 3}, {1, 0, 1}}; EXPECT_THROW(cudf::dictionary::add_keys(dictionary->view(), null_keys), cudf::logic_error); } diff --git a/cpp/tests/dictionary/remove_keys_test.cpp b/cpp/tests/dictionary/remove_keys_test.cpp index 13fe3efd0f4..9950a39d630 100644 --- a/cpp/tests/dictionary/remove_keys_test.cpp +++ b/cpp/tests/dictionary/remove_keys_test.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -119,7 +120,7 @@ TEST_F(DictionaryRemoveKeysTest, Errors) auto const dictionary = cudf::dictionary::encode(input); cudf::test::fixed_width_column_wrapper del_keys{1.0, 2.0, 3.0}; - EXPECT_THROW(cudf::dictionary::remove_keys(dictionary->view(), del_keys), cudf::logic_error); + EXPECT_THROW(cudf::dictionary::remove_keys(dictionary->view(), del_keys), cudf::data_type_error); cudf::test::fixed_width_column_wrapper null_keys{{1, 2, 3}, {1, 0, 1}}; EXPECT_THROW(cudf::dictionary::remove_keys(dictionary->view(), null_keys), cudf::logic_error); } diff --git a/cpp/tests/dictionary/set_keys_test.cpp b/cpp/tests/dictionary/set_keys_test.cpp index d0c37493cf8..5c9ec3567fe 100644 --- a/cpp/tests/dictionary/set_keys_test.cpp +++ b/cpp/tests/dictionary/set_keys_test.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -82,7 +83,7 @@ TEST_F(DictionarySetKeysTest, Errors) auto dictionary = cudf::dictionary::encode(input); cudf::test::fixed_width_column_wrapper new_keys{1.0, 2.0, 3.0}; - EXPECT_THROW(cudf::dictionary::set_keys(dictionary->view(), new_keys), cudf::logic_error); + EXPECT_THROW(cudf::dictionary::set_keys(dictionary->view(), new_keys), cudf::data_type_error); cudf::test::fixed_width_column_wrapper null_keys{{1, 2, 3}, {1, 0, 1}}; EXPECT_THROW(cudf::dictionary::set_keys(dictionary->view(), null_keys), cudf::logic_error); } diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp index 895887ee348..ecc8558243d 100644 --- a/cpp/tests/interop/dlpack_test.cpp +++ b/cpp/tests/interop/dlpack_test.cpp @@ -20,6 +20,7 @@ #include #include +#include #include @@ -98,7 +99,7 @@ TEST_F(DLPackUntypedTests, MultipleTypesToDlpack) cudf::test::fixed_width_column_wrapper col1({1, 2, 3, 4}); cudf::test::fixed_width_column_wrapper col2({1, 2, 3, 4}); cudf::table_view input({col1, col2}); - EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error); + EXPECT_THROW(cudf::to_dlpack(input), cudf::data_type_error); } TEST_F(DLPackUntypedTests, InvalidNullsToDlpack) diff --git a/cpp/tests/labeling/label_bins_tests.cpp b/cpp/tests/labeling/label_bins_tests.cpp index 2ac6ad5dd0d..1a9e74df9be 100644 --- a/cpp/tests/labeling/label_bins_tests.cpp +++ b/cpp/tests/labeling/label_bins_tests.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,7 @@ TEST(BinColumnErrorTests, TestInvalidLeft) EXPECT_THROW( cudf::label_bins(input, left_edges, cudf::inclusive::YES, right_edges, cudf::inclusive::NO), - cudf::logic_error); + cudf::data_type_error); }; // Right edges type check. @@ -76,7 +77,7 @@ TEST(BinColumnErrorTests, TestInvalidRight) EXPECT_THROW( cudf::label_bins(input, left_edges, cudf::inclusive::YES, right_edges, cudf::inclusive::NO), - cudf::logic_error); + cudf::data_type_error); }; // Input type check. @@ -88,7 +89,7 @@ TEST(BinColumnErrorTests, TestInvalidInput) EXPECT_THROW( cudf::label_bins(input, left_edges, cudf::inclusive::YES, right_edges, cudf::inclusive::NO), - cudf::logic_error); + cudf::data_type_error); }; // Number of left and right edges must match. diff --git a/cpp/tests/lists/combine/concatenate_rows_tests.cpp b/cpp/tests/lists/combine/concatenate_rows_tests.cpp index 008003a08a1..bf088eb855a 100644 --- a/cpp/tests/lists/combine/concatenate_rows_tests.cpp +++ b/cpp/tests/lists/combine/concatenate_rows_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include using namespace cudf::test::iterators; @@ -53,7 +54,7 @@ TEST_F(ListConcatenateRowsTest, InvalidInput) auto const col1 = IntListsCol{}.release(); auto const col2 = StrListsCol{}.release(); EXPECT_THROW(cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}}), - cudf::logic_error); + cudf::data_type_error); } } diff --git a/cpp/tests/lists/sequences_tests.cpp b/cpp/tests/lists/sequences_tests.cpp index e97600a76d3..74545903eb3 100644 --- a/cpp/tests/lists/sequences_tests.cpp +++ b/cpp/tests/lists/sequences_tests.cpp @@ -22,6 +22,7 @@ #include #include +#include using namespace cudf::test::iterators; @@ -200,8 +201,8 @@ TEST_F(NumericSequencesTest, InvalidSizesInput) auto const steps = IntsCol{}; auto const sizes = FWDCol{}; - EXPECT_THROW(cudf::lists::sequences(starts, sizes), cudf::logic_error); - EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::logic_error); + EXPECT_THROW(cudf::lists::sequences(starts, sizes), cudf::data_type_error); + EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::data_type_error); } TEST_F(NumericSequencesTest, MismatchedColumnSizesInput) @@ -220,7 +221,7 @@ TEST_F(NumericSequencesTest, MismatchedColumnTypesInput) auto const steps = FWDCol{1, 2, 3}; auto const sizes = IntsCol{1, 2, 3}; - EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::logic_error); + EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::data_type_error); } TEST_F(NumericSequencesTest, InputHasNulls) diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index defc00a1fde..9603ea44a76 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -58,7 +58,7 @@ TEST_F(ReplaceErrorTest, TypeMismatch) cudf::test::fixed_width_column_wrapper values_to_replace_column{ {10, 11, 12, 13, 14, 15, 16, 17}}; - EXPECT_THROW(cudf::replace_nulls(input_column, values_to_replace_column), cudf::logic_error); + EXPECT_THROW(cudf::replace_nulls(input_column, values_to_replace_column), cudf::data_type_error); } // Error: column type mismatch @@ -659,14 +659,14 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsError) cudf::test::fixed_width_column_wrapper replacement_w({1, 2, 3, 4}); auto replacement = cudf::dictionary::encode(replacement_w); - EXPECT_THROW(cudf::replace_nulls(input->view(), replacement->view()), cudf::logic_error); + EXPECT_THROW(cudf::replace_nulls(input->view(), replacement->view()), cudf::data_type_error); EXPECT_THROW(cudf::replace_nulls(input->view(), cudf::string_scalar("x")), cudf::data_type_error); cudf::test::fixed_width_column_wrapper input_one_w({1}, {0}); auto input_one = cudf::dictionary::encode(input_one_w); auto dict_input = cudf::dictionary_column_view(input_one->view()); auto dict_repl = cudf::dictionary_column_view(replacement->view()); - EXPECT_THROW(cudf::replace_nulls(input->view(), replacement->view()), cudf::logic_error); + EXPECT_THROW(cudf::replace_nulls(input->view(), replacement->view()), cudf::data_type_error); } TEST_F(ReplaceDictionaryTest, ReplaceNullsEmpty) diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp index 613034efc12..1858cd7782e 100644 --- a/cpp/tests/replace/replace_tests.cpp +++ b/cpp/tests/replace/replace_tests.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,7 @@ TEST_F(ReplaceErrorTest, TypeMismatch) EXPECT_THROW( cudf::find_and_replace_all(input_column, values_to_replace_column, replacement_values_column), - cudf::logic_error); + cudf::data_type_error); } // Error: nulls in old-values diff --git a/cpp/tests/transform/one_hot_encode_tests.cpp b/cpp/tests/transform/one_hot_encode_tests.cpp index 1015370fe4b..8384cb3480b 100644 --- a/cpp/tests/transform/one_hot_encode_tests.cpp +++ b/cpp/tests/transform/one_hot_encode_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -198,7 +199,7 @@ TEST_F(OneHotEncodingTest, MismatchTypes) auto input = cudf::test::strings_column_wrapper{"xx", "yy", "xx"}; auto category = cudf::test::fixed_width_column_wrapper{1}; - EXPECT_THROW(cudf::one_hot_encode(input, category), cudf::logic_error); + EXPECT_THROW(cudf::one_hot_encode(input, category), cudf::data_type_error); } TEST_F(OneHotEncodingTest, List) From a1bbecbb410a94f65d04ec4caca287938f8eaa67 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 17:57:01 -0500 Subject: [PATCH 09/30] Fix remaining type checks that should use new utilities. --- cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 9 +++++---- cpp/src/strings/slice.cu | 14 ++++++++++---- cpp/src/table/table_view.cpp | 4 ++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index ce9840050a9..95e89179e34 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -921,13 +921,14 @@ Use the `CUDF_EXPECTS` macro to enforce runtime conditions necessary for correct Example usage: ```c++ -CUDF_EXPECTS(lhs.type() == rhs.type(), "Column type mismatch"); +CUDF_EXPECTS(cudf::column_types_equal(lhs, rhs), "Column type mismatch", cudf::data_type_error); ``` The first argument is the conditional expression expected to resolve to `true` under normal -conditions. If the conditional evaluates to `false`, then an error has occurred and an instance of -`cudf::logic_error` is thrown. The second argument to `CUDF_EXPECTS` is a short description of the -error that has occurred and is used for the exception's `what()` message. +conditions. The second argument to `CUDF_EXPECTS` is a short description of the error that has +occurred and is used for the exception's `what()` message. If the conditional evaluates to +`false`, then an error has occurred and an instance of the exception class in the third argument +(or the default, `cudf::logic_error`) is thrown. There are times where a particular code path, if reached, should indicate an error no matter what. For example, often the `default` case of a `switch` statement represents an invalid alternative. diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index 98f3c9cae0d..7bef84ce916 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include @@ -226,13 +228,17 @@ std::unique_ptr slice_strings(strings_column_view const& strings, "Parameter starts must have the same number of rows as strings."); CUDF_EXPECTS(stops_column.size() == strings_count, "Parameter stops must have the same number of rows as strings."); - CUDF_EXPECTS(starts_column.type() == stops_column.type(), - "Parameters starts and stops must be of the same type."); + CUDF_EXPECTS(cudf::column_types_equal(starts_column, stops_column), + "Parameters starts and stops must be of the same type.", + cudf::data_type_error); CUDF_EXPECTS(starts_column.null_count() == 0, "Parameter starts must not contain nulls."); CUDF_EXPECTS(stops_column.null_count() == 0, "Parameter stops must not contain nulls."); CUDF_EXPECTS(starts_column.type().id() != data_type{type_id::BOOL8}.id(), - "Positions values must not be bool type."); - CUDF_EXPECTS(is_fixed_width(starts_column.type()), "Positions values must be fixed width type."); + "Positions values must not be bool type.", + cudf::data_type_error); + CUDF_EXPECTS(is_fixed_width(starts_column.type()), + "Positions values must be fixed width type.", + cudf::data_type_error); auto strings_column = column_device_view::create(strings.parent(), stream); auto starts_iter = cudf::detail::indexalator_factory::make_input_iterator(starts_column); diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index bcbf2d44139..dfbf29f2db5 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -164,7 +164,7 @@ bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) return std::all_of(thrust::counting_iterator(0), thrust::counting_iterator(lhs.num_columns()), [lhs, rhs](auto const i) { - return lhs.column(i).type() == rhs.column(i).type() and + return cudf::column_types_equal(lhs.column(i), rhs.column(i)) and cudf::is_relationally_comparable(lhs.column(i).type()); }); } From 0de9f9b27120e3cb8b2f9cc15f8dc1d262deb2b6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 18:04:30 -0500 Subject: [PATCH 10/30] Add docs section on comparing data types. --- cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 95e89179e34..39bf2805d0f 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1027,6 +1027,16 @@ types such as numeric types and timestamps/durations, adding support for nested Enabling an algorithm differently for different types uses either template specialization or SFINAE, as discussed in [Specializing Type-Dispatched Code Paths](#specializing-type-dispatched-code-paths). +## Comparing Data Types + +When comparing the data types of two columns or scalars, do not directly compare +`a.type() == b.type()`. Nested types such as lists of structs of integers will not be handled +properly if only the top level type is compared. Instead, use one of the following helpers: + + * `cudf::column_types_equal` for comparing the types of two columns + * `cudf::column_scalar_types_equal` for comparing the types of a column and a scalar + * `cudf::scalar_types_equal` for comparing the types of two scalars + # Type Dispatcher libcudf stores data (for columns and scalars) "type erased" in `void*` device memory. This From fc7b821244730519470e432f750a0b4c3fa8a5be Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 27 Mar 2024 18:12:08 -0500 Subject: [PATCH 11/30] Fix typo. --- cpp/include/cudf/utilities/type_checks.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 0e564dd4686..a9987cf2e6c 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -65,8 +65,8 @@ bool column_scalar_types_equal(column_view const& col, scalar const& slr); * - For struct types, the type of each field are compared in order. * - For all other types, the `id` of `data_type` is compared. * - * @param lhs The first `scalar to compare - * @param rhs The second `scalar to compare + * @param lhs The first `scalar` to compare + * @param rhs The second `scalar` to compare * @return true if scalar types match */ bool scalar_types_equal(scalar const& lhs, scalar const& rhs); From faa92c8261d7fd2791cd303bd07f33e3ca11abed Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 1 Apr 2024 18:11:05 -0500 Subject: [PATCH 12/30] Use cudf::is_fixed_point. Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/src/utilities/type_checks.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 2c2a121754a..15162a26273 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -155,8 +155,7 @@ bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { // Check if the columns have fixed point types. This is the only case where // type equality and equivalence differ. - if (lhs.type().id() == type_id::DECIMAL32 || lhs.type().id() == type_id::DECIMAL64 || - lhs.type().id() == type_id::DECIMAL128) { + if (cudf::is_fixed_point(lhs.type()) { return lhs.type().id() == rhs.type().id(); } return column_types_equal(lhs, rhs); From 65562660a61ac9528dd0dfc6a91e334480303f25 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 1 Apr 2024 16:27:25 -0700 Subject: [PATCH 13/30] Fix missing paren. --- cpp/src/utilities/type_checks.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 15162a26273..189a6bbb8b9 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -155,9 +155,7 @@ bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { // Check if the columns have fixed point types. This is the only case where // type equality and equivalence differ. - if (cudf::is_fixed_point(lhs.type()) { - return lhs.type().id() == rhs.type().id(); - } + if (cudf::is_fixed_point(lhs.type())) { return lhs.type().id() == rhs.type().id(); } return column_types_equal(lhs, rhs); } From 6531a53a8a605f6a9c6a3ec8123c15fecc42c4a3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 4 Apr 2024 10:42:09 -0500 Subject: [PATCH 14/30] list column, not lists column Co-authored-by: Lawrence Mitchell --- cpp/src/lists/combine/concatenate_rows.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index 893d5907f5f..d109660a5ab 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -204,7 +204,7 @@ std::unique_ptr concatenate_rows(table_view const& input, std::all_of(input.begin(), input.end(), [](column_view const& col) { return col.type().id() == cudf::type_id::LIST; }), - "All columns of the input table must be of lists column type.", + "All columns of the input table must be of list column type.", cudf::data_type_error); CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), "The types of entries in the input columns must be the same.", From 454d41338f9d42806bee5fdbf66923e53dc7a9aa Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 9 Apr 2024 11:17:06 -0700 Subject: [PATCH 15/30] Rename functions to be overloads of cudf::types_equal, update and expand tests. --- cpp/include/cudf/detail/scatter.cuh | 2 +- cpp/include/cudf/lists/detail/scatter.cuh | 2 +- cpp/include/cudf/utilities/type_checks.hpp | 51 +++-- cpp/src/copying/concatenate.cu | 2 +- cpp/src/copying/copy.cu | 20 +- cpp/src/copying/copy_range.cu | 8 +- cpp/src/copying/scatter.cu | 7 +- cpp/src/copying/shift.cu | 2 +- cpp/src/dictionary/add_keys.cu | 5 +- cpp/src/dictionary/remove_keys.cu | 5 +- cpp/src/dictionary/replace.cu | 7 +- cpp/src/dictionary/search.cu | 4 +- cpp/src/dictionary/set_keys.cu | 3 +- cpp/src/filling/fill.cu | 15 +- cpp/src/filling/sequence.cu | 2 +- cpp/src/groupby/groupby.cu | 14 +- cpp/src/interop/dlpack.cpp | 2 +- cpp/src/labeling/label_bins.cu | 7 +- cpp/src/lists/combine/concatenate_rows.cu | 2 +- cpp/src/lists/contains.cu | 2 +- cpp/src/lists/sequences.cu | 2 +- cpp/src/lists/set_operations.cu | 2 +- cpp/src/reductions/reductions.cpp | 2 +- cpp/src/reductions/segmented/reductions.cpp | 7 +- cpp/src/replace/clamp.cu | 11 +- cpp/src/replace/nulls.cu | 12 +- cpp/src/replace/replace.cu | 4 +- cpp/src/rolling/detail/lead_lag_nested.cuh | 2 +- cpp/src/search/contains_scalar.cu | 4 +- cpp/src/strings/slice.cu | 2 +- cpp/src/table/row_operators.cu | 2 +- cpp/src/table/table_view.cpp | 15 +- cpp/src/transform/one_hot_encode.cu | 2 +- cpp/src/utilities/type_checks.cpp | 53 +++-- .../utilities_tests/type_check_tests.cpp | 197 ++++++++++-------- 35 files changed, 253 insertions(+), 226 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index bb130737931..187367cfd50 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -214,7 +214,7 @@ struct column_scatterer_impl { // check the keys match dictionary_column_view const source(source_in); dictionary_column_view const target(target_in); - CUDF_EXPECTS(cudf::column_types_equal(source.keys(), target.keys()), + CUDF_EXPECTS(cudf::types_equal(source.keys(), target.keys()), "scatter dictionary keys must be the same type", cudf::data_type_error); diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 5fc52ff1c04..395dde94d53 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -100,7 +100,7 @@ std::unique_ptr scatter_impl(rmm::device_uvector cons rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types."); + CUDF_EXPECTS(types_equal(source, target), "Mismatched column types."); auto const child_column_type = lists_column_view(target).child().type(); diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index a9987cf2e6c..1d8a64e7112 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -37,12 +37,29 @@ namespace cudf { * @param rhs The second `column_view` to compare * @return true if column types match */ -bool column_types_equal(column_view const& lhs, column_view const& rhs); +bool types_equal(column_view const& lhs, column_view const& rhs); /** * @brief Compares the type of a `column_view` and a `scalar` * - * This function returns true if the type of `col` equals that of `slr`. + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary column types, the type of the keys are compared to the + * scalar type. + * - For lists types, the type of child columns are compared recursively. + * - For struct types, the type of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The `column_view` to compare + * @param rhs The `scalar` to compare + * @return true if column/scalar types match + */ +bool types_equal(column_view const& lhs, scalar const& rhs); + +/** + * @brief Compares the type of a `scalar` and a `column_view` + * + * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is compared. * - For dictionary column types, the type of the keys are compared to the * scalar type. @@ -50,11 +67,11 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs); * - For struct types, the type of each field are compared in order. * - For all other types, the `id` of `data_type` is compared. * - * @param col The `column_view` to compare - * @param slr The `scalar` to compare + * @param lhs The `scalar` to compare + * @param rhs The `column_view` to compare * @return true if column/scalar types match */ -bool column_scalar_types_equal(column_view const& col, scalar const& slr); +bool types_equal(scalar const& lhs, column_view const& rhs); /** * @brief Compares the type of two `scalar`s @@ -69,7 +86,7 @@ bool column_scalar_types_equal(column_view const& col, scalar const& slr); * @param rhs The second `scalar` to compare * @return true if scalar types match */ -bool scalar_types_equal(scalar const& lhs, scalar const& rhs); +bool types_equal(scalar const& lhs, scalar const& rhs); /** * @brief Compare the type IDs of two `column_view`s @@ -80,25 +97,25 @@ bool scalar_types_equal(scalar const& lhs, scalar const& rhs); * @param rhs The second `column_view` to compare * @return true if column types match */ -bool column_types_equivalent(column_view const& lhs, column_view const& rhs); +bool types_equivalent(column_view const& lhs, column_view const& rhs); /** - * @brief Compare the types of a range of `column_views` + * @brief Compare the types of a range of `column_view` or `scalar` objects * - * This function returns true if cudf::column_types_equal is true for every - * pair of consecutive columns in the range. + * This function returns true if cudf::types_equal is true for every pair of + * consecutive objects (`column_view` or `scalar`) in the range. * * @tparam ForwardIt Forward iterator - * @param first The first column - * @param last The last column - * @return true if all column types match + * @param first The first iterator + * @param last The last iterator + * @return true if all types match */ template -inline bool all_column_types_equal(ForwardIt first, ForwardIt last) +inline bool all_types_equal(ForwardIt first, ForwardIt last) { - return std::all_of(std::next(first), last, [want = *first](auto const& c) { - return cudf::column_types_equal(want, c); - }); + return first == last || std::all_of(std::next(first), last, [want = *first](auto const& c) { + return cudf::types_equal(want, c); + }); } } // namespace cudf diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 84016fb5c93..33514df9562 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -462,7 +462,7 @@ void traverse_children::operator()(host_span */ void bounds_and_type_check(host_span cols, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(cudf::all_column_types_equal(cols.begin(), cols.end()), + CUDF_EXPECTS(cudf::all_types_equal(cols.begin(), cols.end()), "Type mismatch in columns to concatenate.", cudf::data_type_error); diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 675211992df..7ad0da5f09e 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -364,9 +364,8 @@ std::unique_ptr copy_if_else(column_view const& lhs, std::invalid_argument); CUDF_EXPECTS( lhs.size() == rhs.size(), "Both columns must be of the same size", std::invalid_argument); - CUDF_EXPECTS(cudf::column_types_equal(lhs, rhs), - "Both inputs must be of the same type", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -380,9 +379,8 @@ std::unique_ptr copy_if_else(scalar const& lhs, CUDF_EXPECTS(boolean_mask.size() == rhs.size(), "Boolean mask column must be the same size as rhs column", std::invalid_argument); - CUDF_EXPECTS(cudf::column_scalar_types_equal(rhs, lhs), - "Both inputs must be of the same type", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(rhs, lhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, !lhs.is_valid(stream), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -396,9 +394,8 @@ std::unique_ptr copy_if_else(column_view const& lhs, CUDF_EXPECTS(boolean_mask.size() == lhs.size(), "Boolean mask column must be the same size as lhs column", std::invalid_argument); - CUDF_EXPECTS(cudf::column_scalar_types_equal(lhs, rhs), - "Both inputs must be of the same type", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), !rhs.is_valid(stream), boolean_mask, stream, mr); } @@ -409,9 +406,8 @@ std::unique_ptr copy_if_else(scalar const& lhs, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::scalar_types_equal(lhs, rhs), - "Both inputs must be of the same type", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else( lhs, rhs, !lhs.is_valid(stream), !rhs.is_valid(stream), boolean_mask, stream, mr); } diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 01f854b23a9..5e7586bb15a 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -147,7 +147,7 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator() copy_range(column_view const& source, (target_begin <= target.size() - (source_end - source_begin)), "Range is out of bounds.", std::out_of_range); - CUDF_EXPECTS( - cudf::column_types_equal(target, source), "Data type mismatch.", cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(target, source), "Data type mismatch.", cudf::data_type_error); return cudf::type_dispatcher( target.type(), diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 089adb316a7..3e446d9b097 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -113,7 +113,7 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - CUDF_EXPECTS(cudf::column_scalar_types_equal(target, source.get()), + CUDF_EXPECTS(cudf::types_equal(target, source.get()), "scalar and column types must match", cudf::data_type_error); @@ -146,7 +146,7 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - CUDF_EXPECTS(cudf::column_scalar_types_equal(target, source.get()), + CUDF_EXPECTS(cudf::types_equal(target, source.get()), "scalar and column types must match", cudf::data_type_error); @@ -490,8 +490,7 @@ std::unique_ptr
boolean_mask_scatter( CUDF_EXPECTS(std::all_of(thrust::counting_iterator(0), thrust::counting_iterator(target.num_columns()), [&input, &target](auto index) { - return cudf::column_scalar_types_equal(target.column(index), - input[index].get()); + return cudf::types_equal(target.column(index), input[index].get()); }), "Type mismatch in input scalar and target column", cudf::data_type_error); diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index cd2c2f4be09..5c34369e56b 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -158,7 +158,7 @@ std::unique_ptr shift(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::column_scalar_types_equal(input, fill_value), + CUDF_EXPECTS(cudf::types_equal(input, fill_value), "shift requires each fill value type to match the corresponding column type.", cudf::data_type_error); diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index a1eb7d55e14..7d92d3d7e4a 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -55,9 +55,8 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls"); auto old_keys = dictionary_column.keys(); // [a,b,c,d,f] - CUDF_EXPECTS(cudf::column_types_equal(new_keys, old_keys), - "Keys must be the same type", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(new_keys, old_keys), "Keys must be the same type", cudf::data_type_error); // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] auto combined_keys = cudf::detail::concatenate( diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index e84828a4235..95516efdbf0 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -156,9 +156,8 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); - CUDF_EXPECTS(cudf::column_types_equal(keys_view, keys_to_remove), - "keys types must match", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(keys_view, keys_to_remove), "keys types must match", cudf::data_type_error); // locate keys to remove by searching the keys column auto const matches = cudf::detail::contains(keys_to_remove, keys_view, stream, mr); diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 1c06a9e54c9..02028274cb6 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -85,9 +85,8 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls()) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(cudf::column_types_equal(input.keys(), replacement.keys()), - "keys must match", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(input.keys(), replacement.keys()), "keys must match", cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); // first combine the keys so both input dictionaries have the same set @@ -122,7 +121,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, if (!input.has_nulls() || !replacement.is_valid(stream)) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(cudf::column_scalar_types_equal(input.parent(), replacement), + CUDF_EXPECTS(cudf::types_equal(input.parent(), replacement), "keys must match scalar type", cudf::data_type_error); diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 4ad027a0904..ede34962564 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -76,7 +76,7 @@ struct find_index_fn { if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); } - CUDF_EXPECTS(cudf::column_scalar_types_equal(input.parent(), key), + CUDF_EXPECTS(cudf::types_equal(input.parent(), key), "search key type must match dictionary keys type", cudf::data_type_error); @@ -120,7 +120,7 @@ struct find_insert_index_fn { if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); } - CUDF_EXPECTS(cudf::column_scalar_types_equal(input.parent(), key), + CUDF_EXPECTS(cudf::types_equal(input.parent(), key), "search key type must match dictionary keys type", cudf::data_type_error); diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 6afdbbdaea4..0cda45cff62 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -124,8 +124,7 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls"); auto keys = dictionary_column.keys(); - CUDF_EXPECTS( - cudf::column_types_equal(keys, new_keys), "keys types must match", cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(keys, new_keys), "keys types must match", cudf::data_type_error); // copy the keys -- use cudf::distinct to make sure there are no duplicates, // then sort the results. diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index c62ea44fbe6..682d116b09f 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -110,8 +110,7 @@ struct out_of_place_fill_range_dispatch { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS( - cudf::column_scalar_types_equal(input, value), "Data type mismatch.", cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(input, value), "Data type mismatch.", cudf::data_type_error); auto p_ret = std::make_unique(input, stream, mr); if (end != begin) { // otherwise no fill @@ -138,8 +137,7 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator(); auto p_scalar = static_cast(&value); return cudf::strings::detail::fill( @@ -155,9 +153,8 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator()(input, stream, mr); cudf::dictionary_column_view const target(input); - CUDF_EXPECTS(cudf::column_scalar_types_equal(target.parent(), value), - "Data type mismatch.", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(target.parent(), value), "Data type mismatch.", cudf::data_type_error); // if the scalar is invalid, then just copy the column and fill the null mask if (!value.is_valid(stream)) { @@ -223,9 +220,7 @@ void fill_in_place(mutable_column_view& destination, "Range is out of bounds."); CUDF_EXPECTS(destination.nullable() || value.is_valid(stream), "destination should be nullable or value should be non-null."); - CUDF_EXPECTS(cudf::column_scalar_types_equal(destination, value), - "Data type mismatch.", - cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(destination, value), "Data type mismatch.", cudf::data_type_error); if (end != begin) { // otherwise no-op cudf::type_dispatcher( diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index cb65632ee55..79538ad01a9 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -128,7 +128,7 @@ std::unique_ptr sequence(size_type size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::scalar_types_equal(init, step), + CUDF_EXPECTS(cudf::types_equal(init, step), "init and step must be of the same type.", cudf::data_type_error); CUDF_EXPECTS(size >= 0, "size must be >= 0"); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 4c25d206c71..e568be99986 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -312,14 +312,12 @@ std::pair, std::unique_ptr
> groupby::shift( CUDF_FUNC_RANGE(); CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), "Mismatch number of fill_values and columns."); - CUDF_EXPECTS(std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.num_columns()), - [&](auto i) { - return cudf::column_scalar_types_equal(values.column(i), - fill_values[i].get()); - }), - "values and fill_value should have the same type.", - cudf::data_type_error); + CUDF_EXPECTS( + std::all_of(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.num_columns()), + [&](auto i) { return cudf::types_equal(values.column(i), fill_values[i].get()); }), + "values and fill_value should have the same type.", + cudf::data_type_error); auto stream = cudf::get_default_stream(); std::vector> results; diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 33ec78ba090..c70062138fc 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -231,7 +231,7 @@ DLManagedTensor* to_dlpack(table_view const& input, DLDataType const dltype = data_type_to_DLDataType(type); // Ensure all columns are the same type - CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), + CUDF_EXPECTS(cudf::all_types_equal(input.begin(), input.end()), "All columns required to have same data type", cudf::data_type_error); diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 21114e0de1a..fe0d2fb292e 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -208,10 +208,9 @@ std::unique_ptr label_bins(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE() - CUDF_EXPECTS( - cudf::column_types_equal(input, left_edges) && cudf::column_types_equal(input, right_edges), - "The input and edge columns must have the same types.", - cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(input, left_edges) && cudf::types_equal(input, right_edges), + "The input and edge columns must have the same types.", + cudf::data_type_error); CUDF_EXPECTS(left_edges.size() == right_edges.size(), "The left and right edge columns must be of the same length."); CUDF_EXPECTS(!left_edges.has_nulls() && !right_edges.has_nulls(), diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index d109660a5ab..f005d822e12 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -206,7 +206,7 @@ std::unique_ptr concatenate_rows(table_view const& input, [](column_view const& col) { return col.type().id() == cudf::type_id::LIST; }), "All columns of the input table must be of list column type.", cudf::data_type_error); - CUDF_EXPECTS(cudf::all_column_types_equal(input.begin(), input.end()), + CUDF_EXPECTS(cudf::all_types_equal(input.begin(), input.end()), "The types of entries in the input columns must be the same.", cudf::data_type_error); diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 2a7957fc180..88bbb2812ec 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -194,7 +194,7 @@ std::unique_ptr dispatch_index_of(lists_column_view const& lists, // comparisons. auto const child = lists.child(); - CUDF_EXPECTS(cudf::column_types_equal(child, search_keys), + CUDF_EXPECTS(cudf::types_equal(child, search_keys), "Type/Scale of search key does not match list column element type.", cudf::data_type_error); CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty."); diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index d9fa0be76eb..93ac17c6b1e 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -149,7 +149,7 @@ std::unique_ptr sequences(column_view const& starts, CUDF_EXPECTS(!steps_cv.has_nulls(), "steps input column must not have nulls."); CUDF_EXPECTS(starts.size() == steps_cv.size(), "starts and steps input columns must have the same number of rows."); - CUDF_EXPECTS(cudf::column_types_equal(starts, steps_cv), + CUDF_EXPECTS(cudf::types_equal(starts, steps_cv), "starts and steps input columns must have the same type.", cudf::data_type_error); } diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index 5735c84e3d3..dc8aa5d798e 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -51,7 +51,7 @@ namespace { void check_compatibility(lists_column_view const& lhs, lists_column_view const& rhs) { CUDF_EXPECTS(lhs.size() == rhs.size(), "The input lists column must have the same size."); - CUDF_EXPECTS(column_types_equal(lhs.child(), rhs.child()), + CUDF_EXPECTS(types_equal(lhs.child(), rhs.child()), "The input lists columns must have children having the same type structure"); } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 1e1dc4fd339..9bf1855bf02 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -155,7 +155,7 @@ std::unique_ptr reduce(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(!init.has_value() || cudf::column_scalar_types_equal(col, init.value().get()), + CUDF_EXPECTS(!init.has_value() || cudf::types_equal(col, init.value().get()), "column and initial value must be the same type", cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 037d29c3c09..06e23aa4e5a 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -112,10 +112,9 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS( - !init.has_value() || cudf::column_scalar_types_equal(segmented_values, init.value().get()), - "column and initial value must be the same type", - cudf::data_type_error); + CUDF_EXPECTS(!init.has_value() || cudf::types_equal(segmented_values, init.value().get()), + "column and initial value must be the same type", + cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 8cdf44fd9c8..b884f38b6fe 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -199,9 +199,8 @@ struct dispatch_clamp { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(column_scalar_types_equal(input, lo), - "mismatching types of scalar and input", - cudf::data_type_error); + CUDF_EXPECTS( + types_equal(input, lo), "mismatching types of scalar and input", cudf::data_type_error); auto lo_itr = make_optional_iterator(lo, nullate::YES{}); auto hi_itr = make_optional_iterator(hi, nullate::YES{}); @@ -326,11 +325,11 @@ std::unique_ptr clamp(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS( - cudf::scalar_types_equal(lo, hi), "mismatching types of limit scalars", cudf::data_type_error); - CUDF_EXPECTS(cudf::scalar_types_equal(lo_replace, hi_replace), + cudf::types_equal(lo, hi), "mismatching types of limit scalars", cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(lo_replace, hi_replace), "mismatching types of replace scalars", cudf::data_type_error); - CUDF_EXPECTS(cudf::scalar_types_equal(lo, lo_replace), + CUDF_EXPECTS(cudf::types_equal(lo, lo_replace), "mismatching types of limit and replace scalars", cudf::data_type_error); diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index b6167e15565..53ec10a122f 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -307,9 +307,8 @@ struct replace_nulls_scalar_kernel_forwarder { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::column_scalar_types_equal(input, replacement), - "Data type mismatch", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); std::unique_ptr output = cudf::detail::allocate_like( input, input.size(), cudf::mask_allocation_policy::NEVER, stream, mr); auto output_view = output->mutable_view(); @@ -345,9 +344,7 @@ std::unique_ptr replace_nulls_scalar_kernel_forwarder::operator()< rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::column_scalar_types_equal(input, replacement), - "Data type mismatch", - cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); cudf::strings_column_view input_s(input); auto const& repl = static_cast(replacement); return cudf::strings::detail::replace_nulls(input_s, repl, stream, mr); @@ -413,8 +410,7 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS( - cudf::column_types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); + CUDF_EXPECTS(cudf::types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "Column size mismatch"); if (input.is_empty()) { return cudf::empty_like(input); } diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index fe47234ac4c..f87d924c988 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -303,8 +303,8 @@ std::unique_ptr find_and_replace_all(cudf::column_view const& inpu CUDF_EXPECTS(values_to_replace.size() == replacement_values.size(), "values_to_replace and replacement_values size mismatch."); - CUDF_EXPECTS(cudf::column_types_equal(input_col, values_to_replace) && - cudf::column_types_equal(input_col, replacement_values), + CUDF_EXPECTS(cudf::types_equal(input_col, values_to_replace) && + cudf::types_equal(input_col, replacement_values), "Columns type mismatch", cudf::data_type_error); CUDF_EXPECTS(not values_to_replace.has_nulls(), "values_to_replace must not have nulls"); diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index 407d02cf8f2..f3544fc4804 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -100,7 +100,7 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, { CUDF_EXPECTS(op == aggregation::LEAD || op == aggregation::LAG, "Unexpected aggregation type in compute_lead_lag_for_nested"); - CUDF_EXPECTS(cudf::column_types_equal(input, default_outputs), + CUDF_EXPECTS(cudf::types_equal(input, default_outputs), "Defaults column type must match input column.", cudf::data_type_error); // Because LEAD/LAG. diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 1458a951c7a..69b4c9c2d39 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -64,7 +64,7 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { - CUDF_EXPECTS(cudf::column_scalar_types_equal(haystack, needle), + CUDF_EXPECTS(cudf::types_equal(haystack, needle), "Scalar and column types must match", cudf::data_type_error); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller @@ -91,7 +91,7 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { - CUDF_EXPECTS(cudf::column_scalar_types_equal(haystack, needle), + CUDF_EXPECTS(cudf::types_equal(haystack, needle), "Scalar and column types must match", cudf::data_type_error); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index 7bef84ce916..9fe8caab28c 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -228,7 +228,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, "Parameter starts must have the same number of rows as strings."); CUDF_EXPECTS(stops_column.size() == strings_count, "Parameter stops must have the same number of rows as strings."); - CUDF_EXPECTS(cudf::column_types_equal(starts_column, stops_column), + CUDF_EXPECTS(cudf::types_equal(starts_column, stops_column), "Parameters starts and stops must be of the same type.", cudf::data_type_error); CUDF_EXPECTS(starts_column.null_count() == 0, "Parameter starts must not contain nulls."); diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 71b437cb47d..90c8b2c87fb 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -380,7 +380,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs) CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Cannot compare tables with different number of columns"); for (size_type i = 0; i < lhs.num_columns(); ++i) { - CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)), + CUDF_EXPECTS(types_equivalent(lhs.column(i), rhs.column(i)), "Cannot compare tables with different column types"); } } diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index dfbf29f2db5..63c85cd75d1 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -147,13 +147,12 @@ bool has_nested_nullable_columns(table_view const& input) bool have_same_types(table_view const& lhs, table_view const& rhs) { - return std::equal(lhs.begin(), - lhs.end(), - rhs.begin(), - rhs.end(), - [](column_view const& lcol, column_view const& rcol) { - return cudf::column_types_equal(lcol, rcol); - }); + return std::equal( + lhs.begin(), + lhs.end(), + rhs.begin(), + rhs.end(), + [](column_view const& lcol, column_view const& rcol) { return cudf::types_equal(lcol, rcol); }); } namespace detail { @@ -164,7 +163,7 @@ bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) return std::all_of(thrust::counting_iterator(0), thrust::counting_iterator(lhs.num_columns()), [lhs, rhs](auto const i) { - return cudf::column_types_equal(lhs.column(i), rhs.column(i)) and + return cudf::types_equal(lhs.column(i), rhs.column(i)) and cudf::is_relationally_comparable(lhs.column(i).type()); }); } diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 9bec5e0f7fe..49c368e93b4 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -61,7 +61,7 @@ std::pair, table_view> one_hot_encode(column_view const& rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::column_types_equal(input, categories), + CUDF_EXPECTS(cudf::types_equal(input, categories), "Mismatch type between input and categories.", cudf::data_type_error); diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 189a6bbb8b9..776c0293583 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -51,17 +51,18 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_view { if (rhs.type().id() != type_id::LIST) { return false; } auto const& ci = lists_column_view::child_column_index; - return column_types_equal(lhs.child(ci), rhs.child(ci)); + return types_equal(lhs.child(ci), rhs.child(ci)); } template <> bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) { if (rhs.type().id() != type_id::STRUCT) { return false; } - return lhs.num_children() == rhs.num_children() and - std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(lhs.num_children()), - [&](auto i) { return column_types_equal(lhs.child(i), rhs.child(i)); }); + return std::equal(lhs.child_begin(), + lhs.child_end(), + rhs.child_begin(), + rhs.child_end(), + [](auto const& lhs, auto const& rhs) { return types_equal(lhs, rhs); }); } struct column_scalar_equal_fn { @@ -78,7 +79,7 @@ bool column_scalar_equal_fn::operator()(column_view const& col, sc // It is not possible to have a scalar dictionary, so compare the dictionary // column keys type to the scalar type. auto col_keys = cudf::dictionary_column_view(col).keys(); - return column_scalar_types_equal(col_keys, slr); + return types_equal(col_keys, slr); } template <> @@ -87,7 +88,7 @@ bool column_scalar_equal_fn::operator()(column_view const& col, scala if (slr.type().id() != type_id::LIST) { return false; } auto const& ci = lists_column_view::child_column_index; auto const list_slr = static_cast(&slr); - return column_types_equal(col.child(ci), list_slr->view()); + return types_equal(col.child(ci), list_slr->view()); } template <> @@ -96,10 +97,11 @@ bool column_scalar_equal_fn::operator()(column_view const& col, sca if (slr.type().id() != type_id::STRUCT) { return false; } auto const struct_slr = static_cast(&slr); auto const slr_tbl = struct_slr->view(); - return col.num_children() == slr_tbl.num_columns() and - std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(col.num_children()), - [&](auto i) { return column_types_equal(col.child(i), slr_tbl.column(i)); }); + return std::equal(col.child_begin(), + col.child_end(), + slr_tbl.begin(), + slr_tbl.end(), + [](auto const& lhs, auto const& rhs) { return types_equal(lhs, rhs); }); } struct scalars_equal_fn { @@ -116,7 +118,7 @@ bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& rh if (rhs.type().id() != type_id::LIST) { return false; } auto const list_lhs = static_cast(&lhs); auto const list_rhs = static_cast(&rhs); - return column_types_equal(list_lhs->view(), list_rhs->view()); + return types_equal(list_lhs->view(), list_rhs->view()); } template <> @@ -125,38 +127,43 @@ bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& if (rhs.type().id() != type_id::STRUCT) { return false; } auto const tbl_lhs = static_cast(&lhs)->view(); auto const tbl_rhs = static_cast(&rhs)->view(); - return tbl_lhs.num_columns() == tbl_rhs.num_columns() and - std::all_of( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(tbl_lhs.num_columns()), - [&](auto i) { return column_types_equal(tbl_lhs.column(i), tbl_rhs.column(i)); }); + return std::equal(tbl_lhs.begin(), + tbl_lhs.end(), + tbl_rhs.begin(), + tbl_rhs.end(), + [](auto const& lhs, auto const& rhs) { return types_equal(lhs, rhs); }); } }; // namespace // Implementation note: avoid using double dispatch for this function // as it increases code paths to NxN for N types. -bool column_types_equal(column_view const& lhs, column_view const& rhs) +bool types_equal(column_view const& lhs, column_view const& rhs) { return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } -bool column_scalar_types_equal(column_view const& col, scalar const& slr) +bool types_equal(column_view const& lhs, scalar const& rhs) { - return type_dispatcher(col.type(), column_scalar_equal_fn{}, col, slr); + return type_dispatcher(lhs.type(), column_scalar_equal_fn{}, lhs, rhs); } -bool scalar_types_equal(scalar const& lhs, scalar const& rhs) +bool types_equal(scalar const& lhs, column_view const& rhs) +{ + return type_dispatcher(rhs.type(), column_scalar_equal_fn{}, rhs, lhs); +} + +bool types_equal(scalar const& lhs, scalar const& rhs) { return type_dispatcher(lhs.type(), scalars_equal_fn{}, lhs, rhs); } -bool column_types_equivalent(column_view const& lhs, column_view const& rhs) +bool types_equivalent(column_view const& lhs, column_view const& rhs) { // Check if the columns have fixed point types. This is the only case where // type equality and equivalence differ. if (cudf::is_fixed_point(lhs.type())) { return lhs.type().id() == rhs.type().id(); } - return column_types_equal(lhs, rhs); + return types_equal(lhs, rhs); } } // namespace cudf diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index 9c23798fce6..b6ac779ef42 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -19,13 +19,11 @@ #include #include +#include #include #include #include -namespace cudf { -namespace test { - template struct ColumnTypeCheckTestTyped : public cudf::test::BaseFixture {}; @@ -35,56 +33,56 @@ TYPED_TEST_SUITE(ColumnTypeCheckTestTyped, cudf::test::FixedWidthTypes); TYPED_TEST(ColumnTypeCheckTestTyped, SameFixedWidth) { - fixed_width_column_wrapper lhs{1, 1}, rhs{2}; - EXPECT_TRUE(column_types_equal(lhs, rhs)); + cudf::test::fixed_width_column_wrapper lhs{1, 1}, rhs{2}; + EXPECT_TRUE(cudf::types_equal(lhs, rhs)); } TEST_F(ColumnTypeCheckTest, SameString) { - strings_column_wrapper lhs{{'a', 'a'}}, rhs{{'b'}}; - EXPECT_TRUE(column_types_equal(lhs, rhs)); + cudf::test::strings_column_wrapper lhs{{'a', 'a'}}, rhs{{'b'}}; + EXPECT_TRUE(cudf::types_equal(lhs, rhs)); - strings_column_wrapper lhs2{}, rhs2{{'b'}}; - EXPECT_TRUE(column_types_equal(lhs2, rhs2)); + cudf::test::strings_column_wrapper lhs2{}, rhs2{{'b'}}; + EXPECT_TRUE(cudf::types_equal(lhs2, rhs2)); - strings_column_wrapper lhs3{}, rhs3{}; - EXPECT_TRUE(column_types_equal(lhs3, rhs3)); + cudf::test::strings_column_wrapper lhs3{}, rhs3{}; + EXPECT_TRUE(cudf::types_equal(lhs3, rhs3)); } TEST_F(ColumnTypeCheckTest, SameList) { - using LCW = lists_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; LCW lhs{}, rhs{}; - EXPECT_TRUE(column_types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::types_equal(lhs, rhs)); LCW lhs2{{1, 2, 3}}, rhs2{{4, 5}}; - EXPECT_TRUE(column_types_equal(lhs2, rhs2)); + EXPECT_TRUE(cudf::types_equal(lhs2, rhs2)); LCW lhs3{{LCW{1}, LCW{2, 3}}}, rhs3{{LCW{4, 5}}}; - EXPECT_TRUE(column_types_equal(lhs3, rhs3)); + EXPECT_TRUE(cudf::types_equal(lhs3, rhs3)); LCW lhs4{{LCW{1}, LCW{}, LCW{2, 3}}}, rhs4{{LCW{4, 5}, LCW{}}}; - EXPECT_TRUE(column_types_equal(lhs4, rhs4)); + EXPECT_TRUE(cudf::types_equal(lhs4, rhs4)); } TYPED_TEST(ColumnTypeCheckTestTyped, SameDictionary) { - using DCW = dictionary_column_wrapper; + using DCW = cudf::test::dictionary_column_wrapper; DCW lhs{1, 1, 2, 3}, rhs{5, 5}; - EXPECT_TRUE(column_types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::types_equal(lhs, rhs)); DCW lhs2{}, rhs2{}; - EXPECT_TRUE(column_types_equal(lhs2, rhs2)); + EXPECT_TRUE(cudf::types_equal(lhs2, rhs2)); } TEST_F(ColumnTypeCheckTest, SameStruct) { - using SCW = structs_column_wrapper; - using FCW = fixed_width_column_wrapper; - using StringCW = strings_column_wrapper; - using LCW = lists_column_wrapper; - using DCW = dictionary_column_wrapper; + using SCW = cudf::test::structs_column_wrapper; + using FCW = cudf::test::fixed_width_column_wrapper; + using StringCW = cudf::test::strings_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; + using DCW = cudf::test::dictionary_column_wrapper; FCW lf1{1, 2, 3}, rf1{0, 1}; StringCW lf2{"a", "bb", ""}, rf2{"cc", "d"}; @@ -92,127 +90,158 @@ TEST_F(ColumnTypeCheckTest, SameStruct) DCW lf4{5, 5, 5}, rf4{9, 9}; SCW lhs{lf1, lf2, lf3, lf4}, rhs{rf1, rf2, rf3, rf4}; - EXPECT_TRUE(column_types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::types_equal(lhs, rhs)); } TEST_F(ColumnTypeCheckTest, DifferentBasics) { - fixed_width_column_wrapper lhs1{1, 1}; - strings_column_wrapper rhs1{"a", "bb"}; + cudf::test::fixed_width_column_wrapper lhs1{1, 1}; + cudf::test::strings_column_wrapper rhs1{"a", "bb"}; - EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); - lists_column_wrapper lhs2{{"hello"}, {"world", "!"}}; - strings_column_wrapper rhs2{"", "kk"}; + cudf::test::lists_column_wrapper lhs2{{"hello"}, {"world", "!"}}; + cudf::test::strings_column_wrapper rhs2{"", "kk"}; - EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); - fixed_width_column_wrapper lhs3{1, 1}; - dictionary_column_wrapper rhs3{2, 2}; + cudf::test::fixed_width_column_wrapper lhs3{1, 1}; + cudf::test::dictionary_column_wrapper rhs3{2, 2}; - EXPECT_FALSE(column_types_equal(lhs3, rhs3)); + EXPECT_FALSE(cudf::types_equal(lhs3, rhs3)); - lists_column_wrapper lhs4{{8, 8, 8}, {10, 10}}; - structs_column_wrapper rhs4{rhs2, rhs3}; + cudf::test::lists_column_wrapper lhs4{{8, 8, 8}, {10, 10}}; + cudf::test::structs_column_wrapper rhs4{rhs2, rhs3}; - EXPECT_FALSE(column_types_equal(lhs4, rhs4)); + EXPECT_FALSE(cudf::types_equal(lhs4, rhs4)); } TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) { - fixed_width_column_wrapper lhs1{1, 1}; - fixed_width_column_wrapper rhs1{2}; + cudf::test::fixed_width_column_wrapper lhs1{1, 1}; + cudf::test::fixed_width_column_wrapper rhs1{2}; - EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); - fixed_width_column_wrapper lhs2{1, 1}; - fixed_width_column_wrapper rhs2{2}; + cudf::test::fixed_width_column_wrapper lhs2{1, 1}; + cudf::test::fixed_width_column_wrapper rhs2{2}; - EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); - fixed_width_column_wrapper lhs3{1, 1}; - fixed_width_column_wrapper rhs3{2}; + cudf::test::fixed_width_column_wrapper lhs3{1, 1}; + cudf::test::fixed_width_column_wrapper rhs3{2}; - EXPECT_FALSE(column_types_equal(lhs3, rhs3)); + EXPECT_FALSE(cudf::types_equal(lhs3, rhs3)); - fixed_width_column_wrapper lhs4{}; - fixed_width_column_wrapper rhs4{42}; + cudf::test::fixed_width_column_wrapper lhs4{}; + cudf::test::fixed_width_column_wrapper rhs4{42}; - EXPECT_FALSE(column_types_equal(lhs4, rhs4)); + EXPECT_FALSE(cudf::types_equal(lhs4, rhs4)); // Same rep, different scale - fixed_point_column_wrapper lhs5({10000}, numeric::scale_type{-3}); - fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); + cudf::test::fixed_point_column_wrapper lhs5({10000}, numeric::scale_type{-3}); + cudf::test::fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); - EXPECT_FALSE(column_types_equal(lhs5, rhs5)); - EXPECT_TRUE(column_types_equivalent(lhs5, rhs5)); + EXPECT_FALSE(cudf::types_equal(lhs5, rhs5)); + EXPECT_TRUE(cudf::types_equivalent(lhs5, rhs5)); // Different rep, same scale - fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); - fixed_point_column_wrapper rhs6({4200}, numeric::scale_type{-1}); + cudf::test::fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); + cudf::test::fixed_point_column_wrapper rhs6({4200}, numeric::scale_type{-1}); - EXPECT_FALSE(column_types_equal(lhs6, rhs6)); + EXPECT_FALSE(cudf::types_equal(lhs6, rhs6)); } TEST_F(ColumnTypeCheckTest, DifferentDictionary) { - dictionary_column_wrapper lhs1{1, 1, 1, 2, 2, 3}; - dictionary_column_wrapper rhs1{0, 0, 42, 42}; + cudf::test::dictionary_column_wrapper lhs1{1, 1, 1, 2, 2, 3}; + cudf::test::dictionary_column_wrapper rhs1{0, 0, 42, 42}; - EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); - dictionary_column_wrapper lhs2{3.14, 3.14, 5.00}; - dictionary_column_wrapper rhs2{0, 0, 42, 42}; + cudf::test::dictionary_column_wrapper lhs2{3.14, 3.14, 5.00}; + cudf::test::dictionary_column_wrapper rhs2{0, 0, 42, 42}; - EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); - dictionary_column_wrapper lhs3{1, 1, 1, 2, 2, 3}; - dictionary_column_wrapper rhs3{8, 8}; + cudf::test::dictionary_column_wrapper lhs3{1, 1, 1, 2, 2, 3}; + cudf::test::dictionary_column_wrapper rhs3{8, 8}; - EXPECT_FALSE(column_types_equal(lhs3, rhs3)); + EXPECT_FALSE(cudf::types_equal(lhs3, rhs3)); - dictionary_column_wrapper lhs4{1, 1, 2, 3}, rhs4{}; - EXPECT_FALSE(column_types_equal(lhs4, rhs4)); + cudf::test::dictionary_column_wrapper lhs4{1, 1, 2, 3}, rhs4{}; + EXPECT_FALSE(cudf::types_equal(lhs4, rhs4)); } TEST_F(ColumnTypeCheckTest, DifferentLists) { - using LCW_i = lists_column_wrapper; - using LCW_f = lists_column_wrapper; + using LCW_i = cudf::test::lists_column_wrapper; + using LCW_f = cudf::test::lists_column_wrapper; // Different nested level LCW_i lhs1{LCW_i{1, 1, 2, 3}, LCW_i{}, LCW_i{42, 42}}; LCW_i rhs1{LCW_i{LCW_i{8, 8, 8}, LCW_i{9, 9}}, LCW_i{LCW_i{42, 42}}}; - EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); // Different base column type LCW_i lhs2{LCW_i{1, 1, 2, 3}, LCW_i{}, LCW_i{42, 42}}; LCW_f rhs2{LCW_f{9.0, 9.1}, LCW_f{3.14}, LCW_f{}}; - EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); } TEST_F(ColumnTypeCheckTest, DifferentStructs) { - fixed_width_column_wrapper lf1{1, 1, 1}; - fixed_width_column_wrapper rf1{2, 2}; + cudf::test::fixed_width_column_wrapper lf1{1, 1, 1}; + cudf::test::fixed_width_column_wrapper rf1{2, 2}; + + cudf::test::structs_column_wrapper lhs1{lf1}; + cudf::test::structs_column_wrapper rhs1{rf1}; - structs_column_wrapper lhs1{lf1}; - structs_column_wrapper rhs1{rf1}; + EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); - EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + cudf::test::fixed_width_column_wrapper lf2{1, 1, 1}; + cudf::test::fixed_width_column_wrapper rf2{2, 2}; - fixed_width_column_wrapper lf2{1, 1, 1}; - fixed_width_column_wrapper rf2{2, 2}; + cudf::test::strings_column_wrapper lf3{"a", "b", "c"}; - strings_column_wrapper lf3{"a", "b", "c"}; + cudf::test::structs_column_wrapper lhs2{lf2, lf3}; + cudf::test::structs_column_wrapper rhs2{rf2}; - structs_column_wrapper lhs2{lf2, lf3}; - structs_column_wrapper rhs2{rf2}; + EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); +} - EXPECT_FALSE(column_types_equal(lhs2, rhs2)); +TYPED_TEST(ColumnTypeCheckTestTyped, AllTypesEqual) +{ + { + // An empty table + cudf::table_view tbl{}; + EXPECT_TRUE(cudf::all_types_equal(tbl.begin(), tbl.end())); + } + + { + // A table with one column + cudf::test::fixed_width_column_wrapper col1{1, 2, 3}; + cudf::table_view tbl{{col1}}; + EXPECT_TRUE(cudf::all_types_equal(tbl.begin(), tbl.end())); + } + + { + // A table with all the same types + cudf::test::fixed_width_column_wrapper col1{1, 2, 3}; + cudf::test::fixed_width_column_wrapper col2{4, 5, 6}; + cudf::test::fixed_width_column_wrapper col3{7, 8, 9}; + cudf::table_view tbl{{col1, col2, col3}}; + EXPECT_TRUE(cudf::all_types_equal(tbl.begin(), tbl.end())); + } } -} // namespace test -} // namespace cudf +TEST_F(ColumnTypeCheckTest, AllTypesNotEqual) +{ + // A table with different types + cudf::test::fixed_width_column_wrapper col1{1, 2, 3}; + cudf::test::fixed_width_column_wrapper col2{3.14, 1.57, 2.71}; + cudf::table_view tbl{{col1, col2}}; + EXPECT_FALSE(cudf::all_types_equal(tbl.begin(), tbl.end())); +} From c91df106f55b68068e9f1b4cf977fbe5257ea9b3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 9 Apr 2024 11:57:43 -0700 Subject: [PATCH 16/30] Rename function. --- cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 8 ++------ cpp/src/dictionary/detail/concatenate.cu | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 39bf2805d0f..0a89a7c082b 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -921,7 +921,7 @@ Use the `CUDF_EXPECTS` macro to enforce runtime conditions necessary for correct Example usage: ```c++ -CUDF_EXPECTS(cudf::column_types_equal(lhs, rhs), "Column type mismatch", cudf::data_type_error); +CUDF_EXPECTS(cudf::types_equal(lhs, rhs), "Type mismatch", cudf::data_type_error); ``` The first argument is the conditional expression expected to resolve to `true` under normal @@ -1031,11 +1031,7 @@ as discussed in [Specializing Type-Dispatched Code Paths](#specializing-type-dis When comparing the data types of two columns or scalars, do not directly compare `a.type() == b.type()`. Nested types such as lists of structs of integers will not be handled -properly if only the top level type is compared. Instead, use one of the following helpers: - - * `cudf::column_types_equal` for comparing the types of two columns - * `cudf::column_scalar_types_equal` for comparing the types of a column and a scalar - * `cudf::scalar_types_equal` for comparing the types of two scalars +properly if only the top level type is compared. Instead, use the `cudf::types_equal` function. # Type Dispatcher diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 671df3df9e4..c35ddbad29e 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -220,8 +220,7 @@ std::unique_ptr concatenate(host_span columns, // empty column may not have keys so we create an empty column_view place-holder if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0}; auto keys = dict_view.keys(); - // TODO: Compare using cudf::column_types_equal to ensure nested types are - // handled correctly. + // TODO: Use cudf::types_equal to ensure nested types are handled correctly. CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match"); return keys; }); From c7422538c1ebc4e31cef9ebf27eca631ef11f4b7 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 10 Apr 2024 14:07:04 -0700 Subject: [PATCH 17/30] Rename to have_same_types, apply PR feedback. --- .../developer_guide/DEVELOPER_GUIDE.md | 4 +- cpp/include/cudf/detail/scatter.cuh | 2 +- cpp/include/cudf/lists/detail/scatter.cuh | 2 +- cpp/include/cudf/utilities/type_checks.hpp | 14 ++-- cpp/src/copying/concatenate.cu | 2 +- cpp/src/copying/copy.cu | 8 +-- cpp/src/copying/copy_range.cu | 6 +- cpp/src/copying/scatter.cu | 6 +- cpp/src/copying/shift.cu | 2 +- cpp/src/dictionary/add_keys.cu | 2 +- cpp/src/dictionary/detail/concatenate.cu | 2 +- cpp/src/dictionary/remove_keys.cu | 5 +- cpp/src/dictionary/replace.cu | 7 +- cpp/src/dictionary/search.cu | 4 +- cpp/src/dictionary/set_keys.cu | 3 +- cpp/src/filling/fill.cu | 9 +-- cpp/src/filling/sequence.cu | 2 +- cpp/src/groupby/groupby.cu | 13 ++-- cpp/src/interop/dlpack.cpp | 2 +- cpp/src/labeling/label_bins.cu | 7 +- cpp/src/lists/combine/concatenate_rows.cu | 2 +- cpp/src/lists/contains.cu | 2 +- cpp/src/lists/sequences.cu | 2 +- cpp/src/lists/set_operations.cu | 2 +- cpp/src/reductions/reductions.cpp | 2 +- cpp/src/reductions/segmented/reductions.cpp | 2 +- cpp/src/replace/clamp.cu | 8 +-- cpp/src/replace/nulls.cu | 8 ++- cpp/src/replace/replace.cu | 4 +- cpp/src/rolling/detail/lead_lag_nested.cuh | 2 +- cpp/src/search/contains_scalar.cu | 4 +- cpp/src/strings/slice.cu | 2 +- cpp/src/table/row_operators.cu | 2 +- cpp/src/table/table_view.cpp | 15 +++-- cpp/src/transform/one_hot_encode.cu | 2 +- cpp/src/utilities/type_checks.cpp | 24 +++---- .../utilities_tests/type_check_tests.cpp | 66 +++++++++---------- 37 files changed, 130 insertions(+), 121 deletions(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 0a89a7c082b..571e5970337 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -921,7 +921,7 @@ Use the `CUDF_EXPECTS` macro to enforce runtime conditions necessary for correct Example usage: ```c++ -CUDF_EXPECTS(cudf::types_equal(lhs, rhs), "Type mismatch", cudf::data_type_error); +CUDF_EXPECTS(cudf::have_same_types(lhs, rhs), "Type mismatch", cudf::data_type_error); ``` The first argument is the conditional expression expected to resolve to `true` under normal @@ -1031,7 +1031,7 @@ as discussed in [Specializing Type-Dispatched Code Paths](#specializing-type-dis When comparing the data types of two columns or scalars, do not directly compare `a.type() == b.type()`. Nested types such as lists of structs of integers will not be handled -properly if only the top level type is compared. Instead, use the `cudf::types_equal` function. +properly if only the top level type is compared. Instead, use the `cudf::have_same_types` function. # Type Dispatcher diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 187367cfd50..f0dd7dd1a50 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -214,7 +214,7 @@ struct column_scatterer_impl { // check the keys match dictionary_column_view const source(source_in); dictionary_column_view const target(target_in); - CUDF_EXPECTS(cudf::types_equal(source.keys(), target.keys()), + CUDF_EXPECTS(cudf::have_same_types(source.keys(), target.keys()), "scatter dictionary keys must be the same type", cudf::data_type_error); diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 395dde94d53..e31183ed2ca 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -100,7 +100,7 @@ std::unique_ptr scatter_impl(rmm::device_uvector cons rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(types_equal(source, target), "Mismatched column types."); + CUDF_EXPECTS(have_same_types(source, target), "Mismatched column types."); auto const child_column_type = lists_column_view(target).child().type(); diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 1d8a64e7112..9e0b079bb44 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -37,7 +37,7 @@ namespace cudf { * @param rhs The second `column_view` to compare * @return true if column types match */ -bool types_equal(column_view const& lhs, column_view const& rhs); +bool have_same_types(column_view const& lhs, column_view const& rhs); /** * @brief Compares the type of a `column_view` and a `scalar` @@ -54,7 +54,7 @@ bool types_equal(column_view const& lhs, column_view const& rhs); * @param rhs The `scalar` to compare * @return true if column/scalar types match */ -bool types_equal(column_view const& lhs, scalar const& rhs); +bool have_same_types(column_view const& lhs, scalar const& rhs); /** * @brief Compares the type of a `scalar` and a `column_view` @@ -71,7 +71,7 @@ bool types_equal(column_view const& lhs, scalar const& rhs); * @param rhs The `column_view` to compare * @return true if column/scalar types match */ -bool types_equal(scalar const& lhs, column_view const& rhs); +bool have_same_types(scalar const& lhs, column_view const& rhs); /** * @brief Compares the type of two `scalar`s @@ -86,7 +86,7 @@ bool types_equal(scalar const& lhs, column_view const& rhs); * @param rhs The second `scalar` to compare * @return true if scalar types match */ -bool types_equal(scalar const& lhs, scalar const& rhs); +bool have_same_types(scalar const& lhs, scalar const& rhs); /** * @brief Compare the type IDs of two `column_view`s @@ -102,7 +102,7 @@ bool types_equivalent(column_view const& lhs, column_view const& rhs); /** * @brief Compare the types of a range of `column_view` or `scalar` objects * - * This function returns true if cudf::types_equal is true for every pair of + * This function returns true if cudf::have_same_types is true for every pair of * consecutive objects (`column_view` or `scalar`) in the range. * * @tparam ForwardIt Forward iterator @@ -111,10 +111,10 @@ bool types_equivalent(column_view const& lhs, column_view const& rhs); * @return true if all types match */ template -inline bool all_types_equal(ForwardIt first, ForwardIt last) +inline bool all_have_same_types(ForwardIt first, ForwardIt last) { return first == last || std::all_of(std::next(first), last, [want = *first](auto const& c) { - return cudf::types_equal(want, c); + return cudf::have_same_types(want, c); }); } diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 33514df9562..ae240eecfb4 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -462,7 +462,7 @@ void traverse_children::operator()(host_span */ void bounds_and_type_check(host_span cols, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(cudf::all_types_equal(cols.begin(), cols.end()), + CUDF_EXPECTS(cudf::all_have_same_types(cols.begin(), cols.end()), "Type mismatch in columns to concatenate.", cudf::data_type_error); diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 7ad0da5f09e..189a99426b1 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -365,7 +365,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, CUDF_EXPECTS( lhs.size() == rhs.size(), "Both columns must be of the same size", std::invalid_argument); CUDF_EXPECTS( - cudf::types_equal(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -380,7 +380,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, "Boolean mask column must be the same size as rhs column", std::invalid_argument); CUDF_EXPECTS( - cudf::types_equal(rhs, lhs), "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(rhs, lhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, !lhs.is_valid(stream), rhs.has_nulls(), boolean_mask, stream, mr); } @@ -395,7 +395,7 @@ std::unique_ptr copy_if_else(column_view const& lhs, "Boolean mask column must be the same size as lhs column", std::invalid_argument); CUDF_EXPECTS( - cudf::types_equal(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else(lhs, rhs, lhs.has_nulls(), !rhs.is_valid(stream), boolean_mask, stream, mr); } @@ -407,7 +407,7 @@ std::unique_ptr copy_if_else(scalar const& lhs, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS( - cudf::types_equal(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); + cudf::have_same_types(lhs, rhs), "Both inputs must be of the same type", cudf::data_type_error); return copy_if_else( lhs, rhs, !lhs.is_valid(stream), !rhs.is_valid(stream), boolean_mask, stream, mr); } diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 5e7586bb15a..89ce16c28a1 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -147,7 +147,7 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator() copy_range(column_view const& source, (target_begin <= target.size() - (source_end - source_begin)), "Range is out of bounds.", std::out_of_range); - CUDF_EXPECTS(cudf::types_equal(target, source), "Data type mismatch.", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(target, source), "Data type mismatch.", cudf::data_type_error); return cudf::type_dispatcher( target.type(), diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 3e446d9b097..20e7f1c80a7 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -113,7 +113,7 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - CUDF_EXPECTS(cudf::types_equal(target, source.get()), + CUDF_EXPECTS(cudf::have_same_types(target, source.get()), "scalar and column types must match", cudf::data_type_error); @@ -146,7 +146,7 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - CUDF_EXPECTS(cudf::types_equal(target, source.get()), + CUDF_EXPECTS(cudf::have_same_types(target, source.get()), "scalar and column types must match", cudf::data_type_error); @@ -490,7 +490,7 @@ std::unique_ptr
boolean_mask_scatter( CUDF_EXPECTS(std::all_of(thrust::counting_iterator(0), thrust::counting_iterator(target.num_columns()), [&input, &target](auto index) { - return cudf::types_equal(target.column(index), input[index].get()); + return cudf::have_same_types(target.column(index), input[index].get()); }), "Type mismatch in input scalar and target column", cudf::data_type_error); diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 5c34369e56b..c83e9723feb 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -158,7 +158,7 @@ std::unique_ptr shift(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::types_equal(input, fill_value), + CUDF_EXPECTS(cudf::have_same_types(input, fill_value), "shift requires each fill value type to match the corresponding column type.", cudf::data_type_error); diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index 7d92d3d7e4a..e253042784a 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -56,7 +56,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls"); auto old_keys = dictionary_column.keys(); // [a,b,c,d,f] CUDF_EXPECTS( - cudf::types_equal(new_keys, old_keys), "Keys must be the same type", cudf::data_type_error); + cudf::have_same_types(new_keys, old_keys), "Keys must be the same type", cudf::data_type_error); // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] auto combined_keys = cudf::detail::concatenate( diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index c35ddbad29e..f66286d1b30 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -220,7 +220,7 @@ std::unique_ptr concatenate(host_span columns, // empty column may not have keys so we create an empty column_view place-holder if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0}; auto keys = dict_view.keys(); - // TODO: Use cudf::types_equal to ensure nested types are handled correctly. + // TODO: Use cudf::have_same_types to ensure nested types are handled correctly. CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match"); return keys; }); diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 95516efdbf0..1bf113b8ba1 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -156,8 +156,9 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); - CUDF_EXPECTS( - cudf::types_equal(keys_view, keys_to_remove), "keys types must match", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(keys_view, keys_to_remove), + "keys types must match", + cudf::data_type_error); // locate keys to remove by searching the keys column auto const matches = cudf::detail::contains(keys_to_remove, keys_view, stream, mr); diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 02028274cb6..a8a2da639d7 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -85,8 +85,9 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, { if (input.is_empty()) { return cudf::empty_like(input.parent()); } if (!input.has_nulls()) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS( - cudf::types_equal(input.keys(), replacement.keys()), "keys must match", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(input.keys(), replacement.keys()), + "keys must match", + cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); // first combine the keys so both input dictionaries have the same set @@ -121,7 +122,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, if (!input.has_nulls() || !replacement.is_valid(stream)) { return std::make_unique(input.parent(), stream, mr); } - CUDF_EXPECTS(cudf::types_equal(input.parent(), replacement), + CUDF_EXPECTS(cudf::have_same_types(input.parent(), replacement), "keys must match scalar type", cudf::data_type_error); diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index ede34962564..acc39045367 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -76,7 +76,7 @@ struct find_index_fn { if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); } - CUDF_EXPECTS(cudf::types_equal(input.parent(), key), + CUDF_EXPECTS(cudf::have_same_types(input.parent(), key), "search key type must match dictionary keys type", cudf::data_type_error); @@ -120,7 +120,7 @@ struct find_insert_index_fn { if (!key.is_valid(stream)) { return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, 0, false, stream, mr); } - CUDF_EXPECTS(cudf::types_equal(input.parent(), key), + CUDF_EXPECTS(cudf::have_same_types(input.parent(), key), "search key type must match dictionary keys type", cudf::data_type_error); diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 0cda45cff62..3d810266557 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -124,7 +124,8 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column { CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls"); auto keys = dictionary_column.keys(); - CUDF_EXPECTS(cudf::types_equal(keys, new_keys), "keys types must match", cudf::data_type_error); + CUDF_EXPECTS( + cudf::have_same_types(keys, new_keys), "keys types must match", cudf::data_type_error); // copy the keys -- use cudf::distinct to make sure there are no duplicates, // then sort the results. diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 682d116b09f..85acff6fcf4 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -110,7 +110,7 @@ struct out_of_place_fill_range_dispatch { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::types_equal(input, value), "Data type mismatch.", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(input, value), "Data type mismatch.", cudf::data_type_error); auto p_ret = std::make_unique(input, stream, mr); if (end != begin) { // otherwise no fill @@ -137,7 +137,7 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator(); auto p_scalar = static_cast(&value); return cudf::strings::detail::fill( @@ -154,7 +154,7 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator()(input, stream, mr); cudf::dictionary_column_view const target(input); CUDF_EXPECTS( - cudf::types_equal(target.parent(), value), "Data type mismatch.", cudf::data_type_error); + cudf::have_same_types(target.parent(), value), "Data type mismatch.", cudf::data_type_error); // if the scalar is invalid, then just copy the column and fill the null mask if (!value.is_valid(stream)) { @@ -220,7 +220,8 @@ void fill_in_place(mutable_column_view& destination, "Range is out of bounds."); CUDF_EXPECTS(destination.nullable() || value.is_valid(stream), "destination should be nullable or value should be non-null."); - CUDF_EXPECTS(cudf::types_equal(destination, value), "Data type mismatch.", cudf::data_type_error); + CUDF_EXPECTS( + cudf::have_same_types(destination, value), "Data type mismatch.", cudf::data_type_error); if (end != begin) { // otherwise no-op cudf::type_dispatcher( diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index 79538ad01a9..2cf4217c0ab 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -128,7 +128,7 @@ std::unique_ptr sequence(size_type size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::types_equal(init, step), + CUDF_EXPECTS(cudf::have_same_types(init, step), "init and step must be of the same type.", cudf::data_type_error); CUDF_EXPECTS(size >= 0, "size must be >= 0"); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index e568be99986..97922874b97 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -312,12 +312,13 @@ std::pair, std::unique_ptr
> groupby::shift( CUDF_FUNC_RANGE(); CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), "Mismatch number of fill_values and columns."); - CUDF_EXPECTS( - std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.num_columns()), - [&](auto i) { return cudf::types_equal(values.column(i), fill_values[i].get()); }), - "values and fill_value should have the same type.", - cudf::data_type_error); + CUDF_EXPECTS(std::all_of(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(values.num_columns()), + [&](auto i) { + return cudf::have_same_types(values.column(i), fill_values[i].get()); + }), + "values and fill_value should have the same type.", + cudf::data_type_error); auto stream = cudf::get_default_stream(); std::vector> results; diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index c70062138fc..085ecb45f75 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -231,7 +231,7 @@ DLManagedTensor* to_dlpack(table_view const& input, DLDataType const dltype = data_type_to_DLDataType(type); // Ensure all columns are the same type - CUDF_EXPECTS(cudf::all_types_equal(input.begin(), input.end()), + CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), "All columns required to have same data type", cudf::data_type_error); diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index fe0d2fb292e..fd0c2cf6372 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -208,9 +208,10 @@ std::unique_ptr label_bins(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE() - CUDF_EXPECTS(cudf::types_equal(input, left_edges) && cudf::types_equal(input, right_edges), - "The input and edge columns must have the same types.", - cudf::data_type_error); + CUDF_EXPECTS( + cudf::have_same_types(input, left_edges) && cudf::have_same_types(input, right_edges), + "The input and edge columns must have the same types.", + cudf::data_type_error); CUDF_EXPECTS(left_edges.size() == right_edges.size(), "The left and right edge columns must be of the same length."); CUDF_EXPECTS(!left_edges.has_nulls() && !right_edges.has_nulls(), diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index f005d822e12..1c23c4c6c84 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -206,7 +206,7 @@ std::unique_ptr concatenate_rows(table_view const& input, [](column_view const& col) { return col.type().id() == cudf::type_id::LIST; }), "All columns of the input table must be of list column type.", cudf::data_type_error); - CUDF_EXPECTS(cudf::all_types_equal(input.begin(), input.end()), + CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), "The types of entries in the input columns must be the same.", cudf::data_type_error); diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 88bbb2812ec..8bb5b55984a 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -194,7 +194,7 @@ std::unique_ptr dispatch_index_of(lists_column_view const& lists, // comparisons. auto const child = lists.child(); - CUDF_EXPECTS(cudf::types_equal(child, search_keys), + CUDF_EXPECTS(cudf::have_same_types(child, search_keys), "Type/Scale of search key does not match list column element type.", cudf::data_type_error); CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty."); diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index 93ac17c6b1e..3aab19dad0b 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -149,7 +149,7 @@ std::unique_ptr sequences(column_view const& starts, CUDF_EXPECTS(!steps_cv.has_nulls(), "steps input column must not have nulls."); CUDF_EXPECTS(starts.size() == steps_cv.size(), "starts and steps input columns must have the same number of rows."); - CUDF_EXPECTS(cudf::types_equal(starts, steps_cv), + CUDF_EXPECTS(cudf::have_same_types(starts, steps_cv), "starts and steps input columns must have the same type.", cudf::data_type_error); } diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index dc8aa5d798e..6395ae2d72b 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -51,7 +51,7 @@ namespace { void check_compatibility(lists_column_view const& lhs, lists_column_view const& rhs) { CUDF_EXPECTS(lhs.size() == rhs.size(), "The input lists column must have the same size."); - CUDF_EXPECTS(types_equal(lhs.child(), rhs.child()), + CUDF_EXPECTS(have_same_types(lhs.child(), rhs.child()), "The input lists columns must have children having the same type structure"); } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 9bf1855bf02..0d98f58ab76 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -155,7 +155,7 @@ std::unique_ptr reduce(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(!init.has_value() || cudf::types_equal(col, init.value().get()), + CUDF_EXPECTS(!init.has_value() || cudf::have_same_types(col, init.value().get()), "column and initial value must be the same type", cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 06e23aa4e5a..f32465f3823 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -112,7 +112,7 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(!init.has_value() || cudf::types_equal(segmented_values, init.value().get()), + CUDF_EXPECTS(!init.has_value() || cudf::have_same_types(segmented_values, init.value().get()), "column and initial value must be the same type", cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index b884f38b6fe..82b8f90d1f4 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -200,7 +200,7 @@ struct dispatch_clamp { rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS( - types_equal(input, lo), "mismatching types of scalar and input", cudf::data_type_error); + have_same_types(input, lo), "mismatching types of scalar and input", cudf::data_type_error); auto lo_itr = make_optional_iterator(lo, nullate::YES{}); auto hi_itr = make_optional_iterator(hi, nullate::YES{}); @@ -325,11 +325,11 @@ std::unique_ptr clamp(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS( - cudf::types_equal(lo, hi), "mismatching types of limit scalars", cudf::data_type_error); - CUDF_EXPECTS(cudf::types_equal(lo_replace, hi_replace), + cudf::have_same_types(lo, hi), "mismatching types of limit scalars", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(lo_replace, hi_replace), "mismatching types of replace scalars", cudf::data_type_error); - CUDF_EXPECTS(cudf::types_equal(lo, lo_replace), + CUDF_EXPECTS(cudf::have_same_types(lo, lo_replace), "mismatching types of limit and replace scalars", cudf::data_type_error); diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 53ec10a122f..94018cc0314 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -308,7 +308,7 @@ struct replace_nulls_scalar_kernel_forwarder { rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS( - cudf::types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); + cudf::have_same_types(input, replacement), "Data type mismatch", cudf::data_type_error); std::unique_ptr output = cudf::detail::allocate_like( input, input.size(), cudf::mask_allocation_policy::NEVER, stream, mr); auto output_view = output->mutable_view(); @@ -344,7 +344,8 @@ std::unique_ptr replace_nulls_scalar_kernel_forwarder::operator()< rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); + CUDF_EXPECTS( + cudf::have_same_types(input, replacement), "Data type mismatch", cudf::data_type_error); cudf::strings_column_view input_s(input); auto const& repl = static_cast(replacement); return cudf::strings::detail::replace_nulls(input_s, repl, stream, mr); @@ -410,7 +411,8 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::types_equal(input, replacement), "Data type mismatch", cudf::data_type_error); + CUDF_EXPECTS( + cudf::have_same_types(input, replacement), "Data type mismatch", cudf::data_type_error); CUDF_EXPECTS(replacement.size() == input.size(), "Column size mismatch"); if (input.is_empty()) { return cudf::empty_like(input); } diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index f87d924c988..c9ff43618b3 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -303,8 +303,8 @@ std::unique_ptr find_and_replace_all(cudf::column_view const& inpu CUDF_EXPECTS(values_to_replace.size() == replacement_values.size(), "values_to_replace and replacement_values size mismatch."); - CUDF_EXPECTS(cudf::types_equal(input_col, values_to_replace) && - cudf::types_equal(input_col, replacement_values), + CUDF_EXPECTS(cudf::have_same_types(input_col, values_to_replace) && + cudf::have_same_types(input_col, replacement_values), "Columns type mismatch", cudf::data_type_error); CUDF_EXPECTS(not values_to_replace.has_nulls(), "values_to_replace must not have nulls"); diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index f3544fc4804..0a274b94a18 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -100,7 +100,7 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, { CUDF_EXPECTS(op == aggregation::LEAD || op == aggregation::LAG, "Unexpected aggregation type in compute_lead_lag_for_nested"); - CUDF_EXPECTS(cudf::types_equal(input, default_outputs), + CUDF_EXPECTS(cudf::have_same_types(input, default_outputs), "Defaults column type must match input column.", cudf::data_type_error); // Because LEAD/LAG. diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 69b4c9c2d39..e88acf68e28 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -64,7 +64,7 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { - CUDF_EXPECTS(cudf::types_equal(haystack, needle), + CUDF_EXPECTS(cudf::have_same_types(haystack, needle), "Scalar and column types must match", cudf::data_type_error); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller @@ -91,7 +91,7 @@ struct contains_scalar_dispatch { scalar const& needle, rmm::cuda_stream_view stream) const { - CUDF_EXPECTS(cudf::types_equal(haystack, needle), + CUDF_EXPECTS(cudf::have_same_types(haystack, needle), "Scalar and column types must match", cudf::data_type_error); // Don't need to check for needle validity. If it is invalid, it should be handled by the caller diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index 9fe8caab28c..a3de2af78c2 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -228,7 +228,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, "Parameter starts must have the same number of rows as strings."); CUDF_EXPECTS(stops_column.size() == strings_count, "Parameter stops must have the same number of rows as strings."); - CUDF_EXPECTS(cudf::types_equal(starts_column, stops_column), + CUDF_EXPECTS(cudf::have_same_types(starts_column, stops_column), "Parameters starts and stops must be of the same type.", cudf::data_type_error); CUDF_EXPECTS(starts_column.null_count() == 0, "Parameter starts must not contain nulls."); diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 90c8b2c87fb..33267111cc6 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -380,7 +380,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs) CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Cannot compare tables with different number of columns"); for (size_type i = 0; i < lhs.num_columns(); ++i) { - CUDF_EXPECTS(types_equivalent(lhs.column(i), rhs.column(i)), + CUDF_EXPECTS(have_same_types(lhs.column(i), rhs.column(i)), "Cannot compare tables with different column types"); } } diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index 63c85cd75d1..c100fdfea76 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -147,12 +147,13 @@ bool has_nested_nullable_columns(table_view const& input) bool have_same_types(table_view const& lhs, table_view const& rhs) { - return std::equal( - lhs.begin(), - lhs.end(), - rhs.begin(), - rhs.end(), - [](column_view const& lcol, column_view const& rcol) { return cudf::types_equal(lcol, rcol); }); + return std::equal(lhs.begin(), + lhs.end(), + rhs.begin(), + rhs.end(), + [](column_view const& lcol, column_view const& rcol) { + return cudf::have_same_types(lcol, rcol); + }); } namespace detail { @@ -163,7 +164,7 @@ bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) return std::all_of(thrust::counting_iterator(0), thrust::counting_iterator(lhs.num_columns()), [lhs, rhs](auto const i) { - return cudf::types_equal(lhs.column(i), rhs.column(i)) and + return cudf::have_same_types(lhs.column(i), rhs.column(i)) and cudf::is_relationally_comparable(lhs.column(i).type()); }); } diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 49c368e93b4..38e9056034f 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -61,7 +61,7 @@ std::pair, table_view> one_hot_encode(column_view const& rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::types_equal(input, categories), + CUDF_EXPECTS(cudf::have_same_types(input, categories), "Mismatch type between input and categories.", cudf::data_type_error); diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 776c0293583..3ca219a2dbc 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -51,7 +51,7 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_view { if (rhs.type().id() != type_id::LIST) { return false; } auto const& ci = lists_column_view::child_column_index; - return types_equal(lhs.child(ci), rhs.child(ci)); + return have_same_types(lhs.child(ci), rhs.child(ci)); } template <> @@ -62,7 +62,7 @@ bool columns_equal_fn::operator()(column_view const& lhs, column_vi lhs.child_end(), rhs.child_begin(), rhs.child_end(), - [](auto const& lhs, auto const& rhs) { return types_equal(lhs, rhs); }); + [](auto const& lhs, auto const& rhs) { return have_same_types(lhs, rhs); }); } struct column_scalar_equal_fn { @@ -79,7 +79,7 @@ bool column_scalar_equal_fn::operator()(column_view const& col, sc // It is not possible to have a scalar dictionary, so compare the dictionary // column keys type to the scalar type. auto col_keys = cudf::dictionary_column_view(col).keys(); - return types_equal(col_keys, slr); + return have_same_types(col_keys, slr); } template <> @@ -88,7 +88,7 @@ bool column_scalar_equal_fn::operator()(column_view const& col, scala if (slr.type().id() != type_id::LIST) { return false; } auto const& ci = lists_column_view::child_column_index; auto const list_slr = static_cast(&slr); - return types_equal(col.child(ci), list_slr->view()); + return have_same_types(col.child(ci), list_slr->view()); } template <> @@ -101,7 +101,7 @@ bool column_scalar_equal_fn::operator()(column_view const& col, sca col.child_end(), slr_tbl.begin(), slr_tbl.end(), - [](auto const& lhs, auto const& rhs) { return types_equal(lhs, rhs); }); + [](auto const& lhs, auto const& rhs) { return have_same_types(lhs, rhs); }); } struct scalars_equal_fn { @@ -118,7 +118,7 @@ bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& rh if (rhs.type().id() != type_id::LIST) { return false; } auto const list_lhs = static_cast(&lhs); auto const list_rhs = static_cast(&rhs); - return types_equal(list_lhs->view(), list_rhs->view()); + return have_same_types(list_lhs->view(), list_rhs->view()); } template <> @@ -131,29 +131,29 @@ bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& tbl_lhs.end(), tbl_rhs.begin(), tbl_rhs.end(), - [](auto const& lhs, auto const& rhs) { return types_equal(lhs, rhs); }); + [](auto const& lhs, auto const& rhs) { return have_same_types(lhs, rhs); }); } }; // namespace // Implementation note: avoid using double dispatch for this function // as it increases code paths to NxN for N types. -bool types_equal(column_view const& lhs, column_view const& rhs) +bool have_same_types(column_view const& lhs, column_view const& rhs) { return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } -bool types_equal(column_view const& lhs, scalar const& rhs) +bool have_same_types(column_view const& lhs, scalar const& rhs) { return type_dispatcher(lhs.type(), column_scalar_equal_fn{}, lhs, rhs); } -bool types_equal(scalar const& lhs, column_view const& rhs) +bool have_same_types(scalar const& lhs, column_view const& rhs) { return type_dispatcher(rhs.type(), column_scalar_equal_fn{}, rhs, lhs); } -bool types_equal(scalar const& lhs, scalar const& rhs) +bool have_same_types(scalar const& lhs, scalar const& rhs) { return type_dispatcher(lhs.type(), scalars_equal_fn{}, lhs, rhs); } @@ -163,7 +163,7 @@ bool types_equivalent(column_view const& lhs, column_view const& rhs) // Check if the columns have fixed point types. This is the only case where // type equality and equivalence differ. if (cudf::is_fixed_point(lhs.type())) { return lhs.type().id() == rhs.type().id(); } - return types_equal(lhs, rhs); + return have_same_types(lhs, rhs); } } // namespace cudf diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index b6ac779ef42..2089734b4a4 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -34,19 +34,19 @@ TYPED_TEST_SUITE(ColumnTypeCheckTestTyped, cudf::test::FixedWidthTypes); TYPED_TEST(ColumnTypeCheckTestTyped, SameFixedWidth) { cudf::test::fixed_width_column_wrapper lhs{1, 1}, rhs{2}; - EXPECT_TRUE(cudf::types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::have_same_types(lhs, rhs)); } TEST_F(ColumnTypeCheckTest, SameString) { cudf::test::strings_column_wrapper lhs{{'a', 'a'}}, rhs{{'b'}}; - EXPECT_TRUE(cudf::types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::have_same_types(lhs, rhs)); cudf::test::strings_column_wrapper lhs2{}, rhs2{{'b'}}; - EXPECT_TRUE(cudf::types_equal(lhs2, rhs2)); + EXPECT_TRUE(cudf::have_same_types(lhs2, rhs2)); cudf::test::strings_column_wrapper lhs3{}, rhs3{}; - EXPECT_TRUE(cudf::types_equal(lhs3, rhs3)); + EXPECT_TRUE(cudf::have_same_types(lhs3, rhs3)); } TEST_F(ColumnTypeCheckTest, SameList) @@ -54,26 +54,26 @@ TEST_F(ColumnTypeCheckTest, SameList) using LCW = cudf::test::lists_column_wrapper; LCW lhs{}, rhs{}; - EXPECT_TRUE(cudf::types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::have_same_types(lhs, rhs)); LCW lhs2{{1, 2, 3}}, rhs2{{4, 5}}; - EXPECT_TRUE(cudf::types_equal(lhs2, rhs2)); + EXPECT_TRUE(cudf::have_same_types(lhs2, rhs2)); LCW lhs3{{LCW{1}, LCW{2, 3}}}, rhs3{{LCW{4, 5}}}; - EXPECT_TRUE(cudf::types_equal(lhs3, rhs3)); + EXPECT_TRUE(cudf::have_same_types(lhs3, rhs3)); LCW lhs4{{LCW{1}, LCW{}, LCW{2, 3}}}, rhs4{{LCW{4, 5}, LCW{}}}; - EXPECT_TRUE(cudf::types_equal(lhs4, rhs4)); + EXPECT_TRUE(cudf::have_same_types(lhs4, rhs4)); } TYPED_TEST(ColumnTypeCheckTestTyped, SameDictionary) { using DCW = cudf::test::dictionary_column_wrapper; DCW lhs{1, 1, 2, 3}, rhs{5, 5}; - EXPECT_TRUE(cudf::types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::have_same_types(lhs, rhs)); DCW lhs2{}, rhs2{}; - EXPECT_TRUE(cudf::types_equal(lhs2, rhs2)); + EXPECT_TRUE(cudf::have_same_types(lhs2, rhs2)); } TEST_F(ColumnTypeCheckTest, SameStruct) @@ -90,7 +90,7 @@ TEST_F(ColumnTypeCheckTest, SameStruct) DCW lf4{5, 5, 5}, rf4{9, 9}; SCW lhs{lf1, lf2, lf3, lf4}, rhs{rf1, rf2, rf3, rf4}; - EXPECT_TRUE(cudf::types_equal(lhs, rhs)); + EXPECT_TRUE(cudf::have_same_types(lhs, rhs)); } TEST_F(ColumnTypeCheckTest, DifferentBasics) @@ -98,22 +98,22 @@ TEST_F(ColumnTypeCheckTest, DifferentBasics) cudf::test::fixed_width_column_wrapper lhs1{1, 1}; cudf::test::strings_column_wrapper rhs1{"a", "bb"}; - EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::have_same_types(lhs1, rhs1)); cudf::test::lists_column_wrapper lhs2{{"hello"}, {"world", "!"}}; cudf::test::strings_column_wrapper rhs2{"", "kk"}; - EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::have_same_types(lhs2, rhs2)); cudf::test::fixed_width_column_wrapper lhs3{1, 1}; cudf::test::dictionary_column_wrapper rhs3{2, 2}; - EXPECT_FALSE(cudf::types_equal(lhs3, rhs3)); + EXPECT_FALSE(cudf::have_same_types(lhs3, rhs3)); cudf::test::lists_column_wrapper lhs4{{8, 8, 8}, {10, 10}}; cudf::test::structs_column_wrapper rhs4{rhs2, rhs3}; - EXPECT_FALSE(cudf::types_equal(lhs4, rhs4)); + EXPECT_FALSE(cudf::have_same_types(lhs4, rhs4)); } TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) @@ -121,35 +121,35 @@ TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) cudf::test::fixed_width_column_wrapper lhs1{1, 1}; cudf::test::fixed_width_column_wrapper rhs1{2}; - EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::have_same_types(lhs1, rhs1)); cudf::test::fixed_width_column_wrapper lhs2{1, 1}; cudf::test::fixed_width_column_wrapper rhs2{2}; - EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::have_same_types(lhs2, rhs2)); cudf::test::fixed_width_column_wrapper lhs3{1, 1}; cudf::test::fixed_width_column_wrapper rhs3{2}; - EXPECT_FALSE(cudf::types_equal(lhs3, rhs3)); + EXPECT_FALSE(cudf::have_same_types(lhs3, rhs3)); cudf::test::fixed_width_column_wrapper lhs4{}; cudf::test::fixed_width_column_wrapper rhs4{42}; - EXPECT_FALSE(cudf::types_equal(lhs4, rhs4)); + EXPECT_FALSE(cudf::have_same_types(lhs4, rhs4)); // Same rep, different scale cudf::test::fixed_point_column_wrapper lhs5({10000}, numeric::scale_type{-3}); cudf::test::fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); - EXPECT_FALSE(cudf::types_equal(lhs5, rhs5)); + EXPECT_FALSE(cudf::have_same_types(lhs5, rhs5)); EXPECT_TRUE(cudf::types_equivalent(lhs5, rhs5)); // Different rep, same scale cudf::test::fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); cudf::test::fixed_point_column_wrapper rhs6({4200}, numeric::scale_type{-1}); - EXPECT_FALSE(cudf::types_equal(lhs6, rhs6)); + EXPECT_FALSE(cudf::have_same_types(lhs6, rhs6)); } TEST_F(ColumnTypeCheckTest, DifferentDictionary) @@ -157,20 +157,20 @@ TEST_F(ColumnTypeCheckTest, DifferentDictionary) cudf::test::dictionary_column_wrapper lhs1{1, 1, 1, 2, 2, 3}; cudf::test::dictionary_column_wrapper rhs1{0, 0, 42, 42}; - EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::have_same_types(lhs1, rhs1)); cudf::test::dictionary_column_wrapper lhs2{3.14, 3.14, 5.00}; cudf::test::dictionary_column_wrapper rhs2{0, 0, 42, 42}; - EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::have_same_types(lhs2, rhs2)); cudf::test::dictionary_column_wrapper lhs3{1, 1, 1, 2, 2, 3}; cudf::test::dictionary_column_wrapper rhs3{8, 8}; - EXPECT_FALSE(cudf::types_equal(lhs3, rhs3)); + EXPECT_FALSE(cudf::have_same_types(lhs3, rhs3)); cudf::test::dictionary_column_wrapper lhs4{1, 1, 2, 3}, rhs4{}; - EXPECT_FALSE(cudf::types_equal(lhs4, rhs4)); + EXPECT_FALSE(cudf::have_same_types(lhs4, rhs4)); } TEST_F(ColumnTypeCheckTest, DifferentLists) @@ -182,13 +182,13 @@ TEST_F(ColumnTypeCheckTest, DifferentLists) LCW_i lhs1{LCW_i{1, 1, 2, 3}, LCW_i{}, LCW_i{42, 42}}; LCW_i rhs1{LCW_i{LCW_i{8, 8, 8}, LCW_i{9, 9}}, LCW_i{LCW_i{42, 42}}}; - EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::have_same_types(lhs1, rhs1)); // Different base column type LCW_i lhs2{LCW_i{1, 1, 2, 3}, LCW_i{}, LCW_i{42, 42}}; LCW_f rhs2{LCW_f{9.0, 9.1}, LCW_f{3.14}, LCW_f{}}; - EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::have_same_types(lhs2, rhs2)); } TEST_F(ColumnTypeCheckTest, DifferentStructs) @@ -199,7 +199,7 @@ TEST_F(ColumnTypeCheckTest, DifferentStructs) cudf::test::structs_column_wrapper lhs1{lf1}; cudf::test::structs_column_wrapper rhs1{rf1}; - EXPECT_FALSE(cudf::types_equal(lhs1, rhs1)); + EXPECT_FALSE(cudf::have_same_types(lhs1, rhs1)); cudf::test::fixed_width_column_wrapper lf2{1, 1, 1}; cudf::test::fixed_width_column_wrapper rf2{2, 2}; @@ -209,7 +209,7 @@ TEST_F(ColumnTypeCheckTest, DifferentStructs) cudf::test::structs_column_wrapper lhs2{lf2, lf3}; cudf::test::structs_column_wrapper rhs2{rf2}; - EXPECT_FALSE(cudf::types_equal(lhs2, rhs2)); + EXPECT_FALSE(cudf::have_same_types(lhs2, rhs2)); } TYPED_TEST(ColumnTypeCheckTestTyped, AllTypesEqual) @@ -217,14 +217,14 @@ TYPED_TEST(ColumnTypeCheckTestTyped, AllTypesEqual) { // An empty table cudf::table_view tbl{}; - EXPECT_TRUE(cudf::all_types_equal(tbl.begin(), tbl.end())); + EXPECT_TRUE(cudf::all_have_same_types(tbl.begin(), tbl.end())); } { // A table with one column cudf::test::fixed_width_column_wrapper col1{1, 2, 3}; cudf::table_view tbl{{col1}}; - EXPECT_TRUE(cudf::all_types_equal(tbl.begin(), tbl.end())); + EXPECT_TRUE(cudf::all_have_same_types(tbl.begin(), tbl.end())); } { @@ -233,7 +233,7 @@ TYPED_TEST(ColumnTypeCheckTestTyped, AllTypesEqual) cudf::test::fixed_width_column_wrapper col2{4, 5, 6}; cudf::test::fixed_width_column_wrapper col3{7, 8, 9}; cudf::table_view tbl{{col1, col2, col3}}; - EXPECT_TRUE(cudf::all_types_equal(tbl.begin(), tbl.end())); + EXPECT_TRUE(cudf::all_have_same_types(tbl.begin(), tbl.end())); } } @@ -243,5 +243,5 @@ TEST_F(ColumnTypeCheckTest, AllTypesNotEqual) cudf::test::fixed_width_column_wrapper col1{1, 2, 3}; cudf::test::fixed_width_column_wrapper col2{3.14, 1.57, 2.71}; cudf::table_view tbl{{col1, col2}}; - EXPECT_FALSE(cudf::all_types_equal(tbl.begin(), tbl.end())); + EXPECT_FALSE(cudf::all_have_same_types(tbl.begin(), tbl.end())); } From fce091f93adce93e4a8bf99f2b294b99f0079c30 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 16:02:11 -0700 Subject: [PATCH 18/30] Add back deprecated methods. --- cpp/include/cudf/utilities/type_checks.hpp | 44 ++++++++++++++----- cpp/src/utilities/type_checks.cpp | 7 ++- cpp/tests/utilities/column_utilities.cu | 10 +++-- .../utilities_tests/type_check_tests.cpp | 2 +- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 9e0b079bb44..f78a60b5e4e 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -22,6 +22,39 @@ namespace cudf { +/** + * @brief Compares the type of two `column_view`s + * + * @deprecated Since 24.06. Use cudf::have_same_types instead. + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary types, the type of the keys are compared if both are + * non-empty columns. + * - For lists types, the type of child columns are compared recursively. + * - For struct types, the type of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if column types match + */ +[[deprecated]] bool column_types_equal(column_view const& lhs, column_view const& rhs); + +/** + * @brief Compare the type IDs of two `column_view`s + * + * @deprecated Since 24.06. + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is ignored. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if column types match + */ +[[deprecated]] bool column_types_equivalent(column_view const& lhs, column_view const& rhs); + /** * @brief Compares the type of two `column_view`s * @@ -88,17 +121,6 @@ bool have_same_types(scalar const& lhs, column_view const& rhs); */ bool have_same_types(scalar const& lhs, scalar const& rhs); -/** - * @brief Compare the type IDs of two `column_view`s - * This function returns true if the type of `lhs` equals that of `rhs`. - * - For fixed point types, the scale is ignored. - * - * @param lhs The first `column_view` to compare - * @param rhs The second `column_view` to compare - * @return true if column types match - */ -bool types_equivalent(column_view const& lhs, column_view const& rhs); - /** * @brief Compare the types of a range of `column_view` or `scalar` objects * diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 3ca219a2dbc..d31beef4ca8 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -143,6 +143,11 @@ bool have_same_types(column_view const& lhs, column_view const& rhs) return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } +bool column_types_equal(column_view const& lhs, column_view const& rhs) +{ + return have_same_types(lhs, rhs); +} + bool have_same_types(column_view const& lhs, scalar const& rhs) { return type_dispatcher(lhs.type(), column_scalar_equal_fn{}, lhs, rhs); @@ -158,7 +163,7 @@ bool have_same_types(scalar const& lhs, scalar const& rhs) return type_dispatcher(lhs.type(), scalars_equal_fn{}, lhs, rhs); } -bool types_equivalent(column_view const& lhs, column_view const& rhs) +bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { // Check if the columns have fixed point types. This is the only case where // type equality and equivalence differ. diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 047b096a283..24356551128 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -238,9 +239,10 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, template struct column_property_comparator { - bool types_equivalent(cudf::data_type const& lhs, cudf::data_type const& rhs) + bool types_equivalent(cudf::column_view const& lhs, cudf::column_view const& rhs) { - return is_fixed_point(lhs) ? lhs.id() == rhs.id() : lhs == rhs; + return cudf::is_fixed_point(lhs.type()) ? lhs.type().id() == rhs.type().id() + : cudf::have_same_types(lhs, rhs); } bool compare_common(cudf::column_view const& lhs, @@ -252,9 +254,9 @@ struct column_property_comparator { bool result = true; if (check_exact_equality) { - PROP_EXPECT_EQ(lhs.type(), rhs.type()); + PROP_EXPECT_EQ(cudf::have_same_types(lhs, rhs), true); } else { - PROP_EXPECT_EQ(types_equivalent(lhs.type(), rhs.type()), true); + PROP_EXPECT_EQ(types_equivalent(lhs, rhs), true); } auto const lhs_size = check_exact_equality ? lhs.size() : lhs_row_indices.size(); diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index 2089734b4a4..fecb896f95a 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -143,7 +143,7 @@ TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) cudf::test::fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); EXPECT_FALSE(cudf::have_same_types(lhs5, rhs5)); - EXPECT_TRUE(cudf::types_equivalent(lhs5, rhs5)); + EXPECT_TRUE(cudf::column_types_equivalent(lhs5, rhs5)); // Different rep, same scale cudf::test::fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); From cafdf6f1b535a389541d7c2cce57ef02cd960cac Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 18:04:34 -0500 Subject: [PATCH 19/30] Fix verb/subject agreement. --- cpp/include/cudf/utilities/type_checks.hpp | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index f78a60b5e4e..b9f3804c4a2 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -23,7 +23,7 @@ namespace cudf { /** - * @brief Compares the type of two `column_view`s + * @brief Compare the types of two `column_view`s * * @deprecated Since 24.06. Use cudf::have_same_types instead. * @@ -68,56 +68,56 @@ namespace cudf { * * @param lhs The first `column_view` to compare * @param rhs The second `column_view` to compare - * @return true if column types match + * @return true if types match */ bool have_same_types(column_view const& lhs, column_view const& rhs); /** - * @brief Compares the type of a `column_view` and a `scalar` + * @brief Compare the types of a `column_view` and a `scalar` * * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is compared. - * - For dictionary column types, the type of the keys are compared to the + * - For dictionary column types, the type of the keys is compared to the * scalar type. - * - For lists types, the type of child columns are compared recursively. - * - For struct types, the type of each field are compared in order. + * - For lists types, the types of child columns are compared recursively. + * - For struct types, the types of each field are compared in order. * - For all other types, the `id` of `data_type` is compared. * * @param lhs The `column_view` to compare * @param rhs The `scalar` to compare - * @return true if column/scalar types match + * @return true if types match */ bool have_same_types(column_view const& lhs, scalar const& rhs); /** - * @brief Compares the type of a `scalar` and a `column_view` + * @brief Compare the types of a `scalar` and a `column_view` * * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is compared. - * - For dictionary column types, the type of the keys are compared to the + * - For dictionary column types, the type of the keys is compared to the * scalar type. - * - For lists types, the type of child columns are compared recursively. - * - For struct types, the type of each field are compared in order. + * - For lists types, the types of child columns are compared recursively. + * - For struct types, the types of each field are compared in order. * - For all other types, the `id` of `data_type` is compared. * * @param lhs The `scalar` to compare * @param rhs The `column_view` to compare - * @return true if column/scalar types match + * @return true if types match */ bool have_same_types(scalar const& lhs, column_view const& rhs); /** - * @brief Compares the type of two `scalar`s + * @brief Compare the types of two `scalar`s * * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is compared. - * - For lists types, the type of child columns are compared recursively. - * - For struct types, the type of each field are compared in order. + * - For lists types, the types of child columns are compared recursively. + * - For struct types, the types of each field are compared in order. * - For all other types, the `id` of `data_type` is compared. * * @param lhs The first `scalar` to compare * @param rhs The second `scalar` to compare - * @return true if scalar types match + * @return true if types match */ bool have_same_types(scalar const& lhs, scalar const& rhs); From cda4e0d78f3fd12be05132fbd181d423f29142f3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 18:22:31 -0500 Subject: [PATCH 20/30] Update docs for all_have_same_types. Co-authored-by: Lawrence Mitchell --- cpp/include/cudf/utilities/type_checks.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index b9f3804c4a2..46b5362147b 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -124,8 +124,7 @@ bool have_same_types(scalar const& lhs, scalar const& rhs); /** * @brief Compare the types of a range of `column_view` or `scalar` objects * - * This function returns true if cudf::have_same_types is true for every pair of - * consecutive objects (`column_view` or `scalar`) in the range. + * This function returns true if all objects in the range have the same type, in the sense of cudf::have_same_types. * * @tparam ForwardIt Forward iterator * @param first The first iterator From ec740c9e1f9f8ff488fc887146df52ab324e8a88 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 16:28:09 -0700 Subject: [PATCH 21/30] Un-deprecate column_types_equivalent. --- cpp/include/cudf/utilities/type_checks.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index b9f3804c4a2..674a42a01b0 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -44,8 +44,6 @@ namespace cudf { /** * @brief Compare the type IDs of two `column_view`s * - * @deprecated Since 24.06. - * * This function returns true if the type of `lhs` equals that of `rhs`. * - For fixed point types, the scale is ignored. * @@ -53,7 +51,7 @@ namespace cudf { * @param rhs The second `column_view` to compare * @return true if column types match */ -[[deprecated]] bool column_types_equivalent(column_view const& lhs, column_view const& rhs); +bool column_types_equivalent(column_view const& lhs, column_view const& rhs); /** * @brief Compares the type of two `column_view`s From ec0eeffe2b40701692611278aeabe45e2ab6c9fe Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 16:40:32 -0700 Subject: [PATCH 22/30] Apply patch from Lawrence. --- cpp/src/dictionary/detail/concatenate.cu | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index f66286d1b30..796389b7de1 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -81,13 +83,13 @@ struct compute_children_offsets_fn { } /** - * @brief Return the first keys().type of the dictionary columns. + * @brief Return the first keys() of the dictionary columns. */ - data_type get_keys_type() + column_view get_keys() { auto const view(*std::find_if( columns_ptrs.begin(), columns_ptrs.end(), [](auto pcv) { return pcv->size() > 0; })); - return dictionary_column_view(*view).keys().type(); + return dictionary_column_view(*view).keys(); } /** @@ -213,15 +215,16 @@ std::unique_ptr concatenate(host_span columns, // concatenate the keys (and check the keys match) compute_children_offsets_fn child_offsets_fn{columns}; - auto keys_type = child_offsets_fn.get_keys_type(); + auto expected_keys = child_offsets_fn.get_keys(); std::vector keys_views(columns.size()); - std::transform(columns.begin(), columns.end(), keys_views.begin(), [keys_type](auto cv) { + std::transform(columns.begin(), columns.end(), keys_views.begin(), [expected_keys](auto cv) { auto dict_view = dictionary_column_view(cv); // empty column may not have keys so we create an empty column_view place-holder - if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0}; + if (dict_view.is_empty()) return column_view{expected_keys.type(), 0, nullptr, nullptr, 0}; auto keys = dict_view.keys(); - // TODO: Use cudf::have_same_types to ensure nested types are handled correctly. - CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match"); + CUDF_EXPECTS(cudf::have_same_types(keys, expected_keys), + "key types of all dictionary columns must match", + cudf::data_type_error); return keys; }); auto all_keys = @@ -275,7 +278,7 @@ std::unique_ptr concatenate(host_span columns, // now recompute the indices values for the new keys_column; // the keys offsets (pair.first) are for mapping to the input keys - auto indices_column = type_dispatcher(keys_type, + auto indices_column = type_dispatcher(expected_keys.type(), dispatch_compute_indices{}, all_keys->view(), // old keys all_indices->view(), // old indices From 5afc5e20970d8bd3321bdc2158d4c8f85035ac70 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 16:41:05 -0700 Subject: [PATCH 23/30] Use std::equal. --- cpp/src/groupby/groupby.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 97922874b97..44501e01dea 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -312,14 +312,14 @@ std::pair, std::unique_ptr
> groupby::shift( CUDF_FUNC_RANGE(); CUDF_EXPECTS(values.num_columns() == static_cast(fill_values.size()), "Mismatch number of fill_values and columns."); - CUDF_EXPECTS(std::all_of(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(values.num_columns()), - [&](auto i) { - return cudf::have_same_types(values.column(i), fill_values[i].get()); - }), + CUDF_EXPECTS(std::equal(values.begin(), + values.end(), + fill_values.begin(), + [](auto const& col, auto const& scalar) { + return cudf::have_same_types(col, scalar.get()); + }), "values and fill_value should have the same type.", cudf::data_type_error); - auto stream = cudf::get_default_stream(); std::vector> results; auto const& group_offsets = helper().group_offsets(stream); From 18cdd5229cdd7ea4e0193069e573e0254bf0e0b1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 16:41:18 -0700 Subject: [PATCH 24/30] Switch back to column_types_equivalent. --- cpp/src/table/row_operators.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 33267111cc6..71b437cb47d 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -380,7 +380,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs) CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Cannot compare tables with different number of columns"); for (size_type i = 0; i < lhs.num_columns(); ++i) { - CUDF_EXPECTS(have_same_types(lhs.column(i), rhs.column(i)), + CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)), "Cannot compare tables with different column types"); } } From 7c05d952d0dcac665a294d2e30d8d7029f035d90 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 12 Apr 2024 18:06:05 -0700 Subject: [PATCH 25/30] Fix tests that had incorrect nested types. --- cpp/tests/copying/get_value_tests.cpp | 11 +++-------- cpp/tests/io/parquet_writer_test.cpp | 4 +--- cpp/tests/utilities/column_utilities.cu | 8 +------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 2be3c26af1d..99b86c86997 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -542,11 +542,6 @@ struct ListGetStructValueTest : public cudf::test::BaseFixture { return SCW{{field1, field2, field3}, mask}; } - /** - * @brief Create a 0-length structs column - */ - SCW zero_length_struct() { return SCW{}; } - /** * @brief Concatenate structs columns, allow specifying inputs in `initializer_list` */ @@ -653,7 +648,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty) cudf::size_type index = 2; // For well-formed list column, an empty list still holds the complete structure of // a 0-length structs column - auto expected_data = this->zero_length_struct(); + auto expected_data = this->make_test_structs_column({}, {}, {}, no_nulls()); auto s = cudf::get_element(list_column->view(), index); auto typed_s = static_cast(s.get()); @@ -757,8 +752,8 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullEmpty) auto list_column_nested = this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 1}); - auto expected_data = - this->make_test_lists_column(0, {0}, this->zero_length_struct().release(), {}); + auto expected_data = this->make_test_lists_column( + 0, {0}, this->make_test_structs_column({}, {}, {}, no_nulls()).release(), {}); cudf::size_type index = 1; auto s = cudf::get_element(list_column_nested->view(), index); diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp index ffa672fb564..92294c094cf 100644 --- a/cpp/tests/io/parquet_writer_test.cpp +++ b/cpp/tests/io/parquet_writer_test.cpp @@ -556,9 +556,7 @@ TEST_F(ParquetWriterTest, EmptyList) auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); - using lcw = cudf::test::lists_column_wrapper; - auto expected = lcw{lcw{}, lcw{}, lcw{}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), L0->view()); } TEST_F(ParquetWriterTest, DeepEmptyList) diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 24356551128..7cc2777972e 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -239,12 +239,6 @@ std::unique_ptr generate_child_row_indices(lists_column_view const& c, template struct column_property_comparator { - bool types_equivalent(cudf::column_view const& lhs, cudf::column_view const& rhs) - { - return cudf::is_fixed_point(lhs.type()) ? lhs.type().id() == rhs.type().id() - : cudf::have_same_types(lhs, rhs); - } - bool compare_common(cudf::column_view const& lhs, cudf::column_view const& rhs, cudf::column_view const& lhs_row_indices, @@ -256,7 +250,7 @@ struct column_property_comparator { if (check_exact_equality) { PROP_EXPECT_EQ(cudf::have_same_types(lhs, rhs), true); } else { - PROP_EXPECT_EQ(types_equivalent(lhs, rhs), true); + PROP_EXPECT_EQ(cudf::column_types_equivalent(lhs, rhs), true); } auto const lhs_size = check_exact_equality ? lhs.size() : lhs_row_indices.size(); From 15d05e0bd5a1b41d4e9e7fcc1dca4b0f859ef8ab Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 30 Apr 2024 17:53:45 -0500 Subject: [PATCH 26/30] Fix clang-format. --- cpp/include/cudf/utilities/type_checks.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 9953037a253..f0a984ebf1f 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -122,7 +122,8 @@ bool have_same_types(scalar const& lhs, scalar const& rhs); /** * @brief Compare the types of a range of `column_view` or `scalar` objects * - * This function returns true if all objects in the range have the same type, in the sense of cudf::have_same_types. + * This function returns true if all objects in the range have the same type, in the sense of + * cudf::have_same_types. * * @tparam ForwardIt Forward iterator * @param first The first iterator From 57c1f644a213b67f30bc511e392f7f222174f294 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 1 May 2024 13:33:38 -0700 Subject: [PATCH 27/30] Move table_view overload from table_view.hpp to type_checks.hpp. --- cpp/include/cudf/table/table_view.hpp | 11 +---------- cpp/include/cudf/utilities/type_checks.hpp | 9 +++++++++ cpp/src/merge/merge.cu | 1 + cpp/src/search/contains_table.cu | 1 + cpp/src/table/table_view.cpp | 21 ++++++--------------- cpp/src/utilities/type_checks.cpp | 10 ++++++++++ 6 files changed, 28 insertions(+), 25 deletions(-) diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 4f3b23747e6..ad12b1eef4e 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -339,15 +339,6 @@ bool has_nested_nullable_columns(table_view const& input); */ std::vector get_nullable_columns(table_view const& table); -/** - * @brief Checks if two `table_view`s have columns of same types - * - * @param lhs left-side table_view operand - * @param rhs right-side table_view operand - * @return boolean comparison result - */ -bool have_same_types(table_view const& lhs, table_view const& rhs); - /** * @brief Copy column_views from a table_view into another table_view according to * a column indices map. diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index f0a984ebf1f..fd3b0581c11 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -119,6 +119,15 @@ bool have_same_types(scalar const& lhs, column_view const& rhs); */ bool have_same_types(scalar const& lhs, scalar const& rhs); +/** + * @brief Checks if two `table_view`s have columns of same types + * + * @param lhs left-side table_view operand + * @param rhs right-side table_view operand + * @return boolean comparison result + */ +bool have_same_types(table_view const& lhs, table_view const& rhs); + /** * @brief Compare the types of a range of `column_view` or `scalar` objects * diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 5a3be259ed9..630cf328579 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 13417fdab63..466f9093194 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index c100fdfea76..13832b0d9dc 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -145,30 +145,21 @@ bool has_nested_nullable_columns(table_view const& input) }); } -bool have_same_types(table_view const& lhs, table_view const& rhs) +namespace detail { + +template +bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) { return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), [](column_view const& lcol, column_view const& rcol) { - return cudf::have_same_types(lcol, rcol); + return cudf::is_relationally_comparable(lcol.type()) and + cudf::have_same_types(lcol, rcol); }); } -namespace detail { - -template -bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) -{ - return std::all_of(thrust::counting_iterator(0), - thrust::counting_iterator(lhs.num_columns()), - [lhs, rhs](auto const i) { - return cudf::have_same_types(lhs.column(i), rhs.column(i)) and - cudf::is_relationally_comparable(lhs.column(i).type()); - }); -} - // Explicit template instantiation for a table of immutable views template bool is_relationally_comparable(table_view const& lhs, table_view const& rhs); diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index d31beef4ca8..c1f7d2b1dbe 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -163,6 +163,16 @@ bool have_same_types(scalar const& lhs, scalar const& rhs) return type_dispatcher(lhs.type(), scalars_equal_fn{}, lhs, rhs); } +bool have_same_types(table_view const& lhs, table_view const& rhs) +{ + return std::equal( + lhs.begin(), + lhs.end(), + rhs.begin(), + rhs.end(), + [](column_view const& lcol, column_view const& rcol) { return have_same_types(lcol, rcol); }); +} + bool column_types_equivalent(column_view const& lhs, column_view const& rhs) { // Check if the columns have fixed point types. This is the only case where From 25c5ad9744ff3e27f2ee740f3addc0dd61fcb7fc Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 1 May 2024 15:38:29 -0500 Subject: [PATCH 28/30] Mirror scalar/column call. Co-authored-by: Lawrence Mitchell --- cpp/src/utilities/type_checks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index c1f7d2b1dbe..01f86713391 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -155,7 +155,7 @@ bool have_same_types(column_view const& lhs, scalar const& rhs) bool have_same_types(scalar const& lhs, column_view const& rhs) { - return type_dispatcher(rhs.type(), column_scalar_equal_fn{}, rhs, lhs); + return have_same_types(rhs, lhs); } bool have_same_types(scalar const& lhs, scalar const& rhs) From d764f7e47afc342b4b961fc74bcf72e94a21825e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 1 May 2024 13:50:50 -0700 Subject: [PATCH 29/30] Minor cleanup. --- cpp/src/groupby/groupby.cu | 3 ++- cpp/src/replace/clamp.cu | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index d3718cc4c49..e43dfcb4d98 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -315,7 +315,8 @@ std::pair, std::unique_ptr
> groupby::shift( "Mismatch number of fill_values and columns."); CUDF_EXPECTS(std::equal(values.begin(), values.end(), - fill_values.begin(), + fill_values.cbegin(), + fill_values.cend(), [](auto const& col, auto const& scalar) { return cudf::have_same_types(col, scalar.get()); }), diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 492f344f2cf..cb8c0a0d5df 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -200,8 +200,9 @@ struct dispatch_clamp { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - CUDF_EXPECTS( - have_same_types(input, lo), "mismatching types of scalar and input", cudf::data_type_error); + CUDF_EXPECTS(cudf::have_same_types(input, lo), + "mismatching types of scalar and input", + cudf::data_type_error); auto lo_itr = make_optional_iterator(lo, nullate::YES{}); auto hi_itr = make_optional_iterator(hi, nullate::YES{}); From e0732b965389570667521f4656217b398d86794e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 1 May 2024 13:51:28 -0700 Subject: [PATCH 30/30] Improve dispatching to existing overloads. --- cpp/src/utilities/type_checks.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index c1f7d2b1dbe..dac981fb532 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -127,11 +127,7 @@ bool scalars_equal_fn::operator()(scalar const& lhs, scalar const& if (rhs.type().id() != type_id::STRUCT) { return false; } auto const tbl_lhs = static_cast(&lhs)->view(); auto const tbl_rhs = static_cast(&rhs)->view(); - return std::equal(tbl_lhs.begin(), - tbl_lhs.end(), - tbl_rhs.begin(), - tbl_rhs.end(), - [](auto const& lhs, auto const& rhs) { return have_same_types(lhs, rhs); }); + return have_same_types(tbl_lhs, tbl_rhs); } }; // namespace @@ -155,7 +151,7 @@ bool have_same_types(column_view const& lhs, scalar const& rhs) bool have_same_types(scalar const& lhs, column_view const& rhs) { - return type_dispatcher(rhs.type(), column_scalar_equal_fn{}, rhs, lhs); + return have_same_types(rhs, lhs); } bool have_same_types(scalar const& lhs, scalar const& rhs)