diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp index 9d3cc9f90bc..8c037406e45 100644 --- a/cpp/include/cudf/dictionary/detail/update_keys.hpp +++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -72,18 +73,13 @@ std::unique_ptr set_keys( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Create new dictionaries that have keys merged from the input dictionaries. + * @copydoc + * cudf::dictionary::match_dictionaries(std::vector,mm::mr::device_memory_resource*) * - * This will concatenate the keys for each dictionary and then call `set_keys` on each. - * The result is a vector of new dictionaries with a common set of keys. - * - * @param input Dictionary columns to match keys. - * @param mr Device memory resource used to allocate the returned column's device memory. * @param stream CUDA stream used for device memory operations and kernel launches. - * @return New dictionary column. */ std::vector> match_dictionaries( - std::vector input, + cudf::host_span input, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index 99a6c705edc..2b66a4d5072 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include namespace cudf { namespace dictionary { @@ -139,6 +140,20 @@ std::unique_ptr set_keys( column_view const& keys, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Create new dictionaries that have keys merged from the input dictionaries. + * + * This will concatenate the keys for each dictionary and then call `set_keys` on each. + * The result is a vector of new dictionaries with a common set of keys. + * + * @param input Dictionary columns to match keys. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New dictionary columns. + */ +std::vector> match_dictionaries( + cudf::host_span input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace dictionary } // namespace cudf diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 9b644f38794..1dbb844a606 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -90,7 +90,8 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, CUDF_EXPECTS(replacement.size() == input.size(), "column sizes must match"); // first combine the keys so both input dictionaries have the same set - auto matched = match_dictionaries({input, replacement}, stream, mr); + auto matched = + match_dictionaries(std::vector({input, replacement}), stream, mr); // now build the new indices by doing replace-null using the updated input indices auto const input_indices = diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 8f07c9cbbed..2e0ab389a9c 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -150,9 +150,10 @@ std::unique_ptr set_keys( new_nulls.second); } -std::vector> match_dictionaries(std::vector input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::vector> match_dictionaries( + cudf::host_span input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector keys(input.size()); std::transform(input.begin(), input.end(), keys.begin(), [](auto& col) { return col.keys(); }); @@ -221,5 +222,12 @@ std::unique_ptr set_keys(dictionary_column_view const& dictionary_column return detail::set_keys(dictionary_column, keys, rmm::cuda_stream_default, mr); } +std::vector> match_dictionaries( + cudf::host_span input, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::match_dictionaries(input, rmm::cuda_stream_default, mr); +} + } // namespace dictionary } // namespace cudf diff --git a/cpp/tests/dictionary/set_keys_test.cpp b/cpp/tests/dictionary/set_keys_test.cpp index ebeb94e0ba9..9e15bc63740 100644 --- a/cpp/tests/dictionary/set_keys_test.cpp +++ b/cpp/tests/dictionary/set_keys_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,3 +84,26 @@ TEST_F(DictionarySetKeysTest, Errors) cudf::test::fixed_width_column_wrapper null_keys{{1, 2, 3}, {1, 0, 1}}; EXPECT_THROW(cudf::dictionary::set_keys(dictionary->view(), null_keys), cudf::logic_error); } + +TEST_F(DictionarySetKeysTest, MatchDictionaries) +{ + cudf::test::dictionary_column_wrapper col1{5, 0, 4, 1, 2, 2, 2, 5, 0}; + cudf::test::dictionary_column_wrapper col2{1, 0, 3, 1, 4, 5, 6, 5, 0}; + + auto input = std::vector( + {cudf::dictionary_column_view(col1), cudf::dictionary_column_view(col2)}); + + auto results = cudf::dictionary::match_dictionaries(input); + auto keys1 = cudf::dictionary_column_view(results[0]->view()).keys(); + auto keys2 = cudf::dictionary_column_view(results[1]->view()).keys(); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys1, keys2); + + auto result1 = cudf::dictionary::decode(cudf::dictionary_column_view(results[0]->view())); + auto result2 = cudf::dictionary::decode(cudf::dictionary_column_view(results[1]->view())); + + auto expected1 = cudf::dictionary::decode(cudf::dictionary_column_view(col1)); + auto expected2 = cudf::dictionary::decode(cudf::dictionary_column_view(col2)); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result1->view(), expected1->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result2->view(), expected2->view()); +}