From 9bb9af608233ab51490f0e077b7b8371d462c83e Mon Sep 17 00:00:00 2001 From: Danial Javady <122740063+ZelboK@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:45:08 -0500 Subject: [PATCH 1/2] Expose streams in public filling APIs for label_bins (#14401) Contributes to #925. Introduces cuda_stream parameter for downstream users to provide for `labeling_bins` Authors: - Danial Javady (https://github.com/ZelboK) - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14401 --- cpp/include/cudf/labeling/label_bins.hpp | 4 ++- cpp/src/labeling/label_bins.cu | 12 +++----- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/labeling_bins_test.cpp | 37 ++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 cpp/tests/streams/labeling_bins_test.cpp diff --git a/cpp/include/cudf/labeling/label_bins.hpp b/cpp/include/cudf/labeling/label_bins.hpp index 2776f50a939..d8ea262dfe1 100644 --- a/cpp/include/cudf/labeling/label_bins.hpp +++ b/cpp/include/cudf/labeling/label_bins.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,6 +64,7 @@ enum class inclusive { YES, NO }; * @param left_inclusive Whether or not the left edge is inclusive. * @param right_edges Value of the right edge of each bin. * @param right_inclusive Whether or not the right edge is inclusive. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device. * @return The integer labels of the elements in `input` according to the specified bins. */ @@ -73,6 +74,7 @@ std::unique_ptr label_bins( inclusive left_inclusive, column_view const& right_edges, inclusive right_inclusive, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 1a603785a41..9fecaa1ddb2 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -236,15 +236,11 @@ std::unique_ptr label_bins(column_view const& input, inclusive left_inclusive, column_view const& right_edges, inclusive right_inclusive, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::label_bins(input, - left_edges, - left_inclusive, - right_edges, - right_inclusive, - cudf::get_default_stream(), - mr); + return detail::label_bins( + input, left_edges, left_inclusive, right_edges, right_inclusive, stream, mr); } } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b385c63e9cd..a3b982a6719 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -653,6 +653,7 @@ ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ORCIO_TEST streams/io/orc_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/labeling_bins_test.cpp b/cpp/tests/streams/labeling_bins_test.cpp new file mode 100644 index 00000000000..a1d3983aacc --- /dev/null +++ b/cpp/tests/streams/labeling_bins_test.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +class LabelingBinsStreamTest : public cudf::test::BaseFixture {}; + +TEST_F(LabelingBinsStreamTest, SimpleStringsTest) +{ + cudf::test::strings_column_wrapper left_edges{"a", "b", "c", "d", "e"}; + cudf::test::strings_column_wrapper right_edges{"b", "c", "d", "e", "f"}; + cudf::test::strings_column_wrapper input{"abc", "bcd", "cde", "def", "efg"}; + + cudf::label_bins(input, + left_edges, + cudf::inclusive::YES, + right_edges, + cudf::inclusive::NO, + cudf::test::get_default_stream()); +} From 446da756f0703556734a7409534db3bdc01c7975 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:50:27 -0500 Subject: [PATCH 2/2] Move cudf::char_utf8 definition from detail to public header (#14779) Moves the `cudf::char_utf8` definition from the `cudf/strings/detail/utf8.hpp` to `cudf/types.hpp` since it is declared in the public namespace and used in public functions. Reference: https://github.com/rapidsai/cudf/blob/9acddc08cc209e8d6b94891be6131edd63ff5b43/docs/cudf/source/conf.py#L372-L375 Authors: - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14779 --- cpp/include/cudf/strings/detail/utf8.hpp | 5 +- cpp/include/cudf/types.hpp | 2 +- docs/cudf/source/conf.py | 83 +++++++++++++++--------- 3 files changed, 55 insertions(+), 35 deletions(-) diff --git a/cpp/include/cudf/strings/detail/utf8.hpp b/cpp/include/cudf/strings/detail/utf8.hpp index e04572535de..5587597cb51 100644 --- a/cpp/include/cudf/strings/detail/utf8.hpp +++ b/cpp/include/cudf/strings/detail/utf8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,9 +23,6 @@ */ namespace cudf { - -using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes - namespace strings { namespace detail { diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 86750ea4ca8..baf07fa3db6 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -62,7 +62,6 @@ class mutable_column_view; class string_view; class list_view; class struct_view; - class scalar; // clang-format off @@ -95,6 +94,7 @@ using size_type = int32_t; ///< Row index type for columns and tables using bitmask_type = uint32_t; ///< Bitmask type stored as 32-bit unsigned integer using valid_type = uint8_t; ///< Valid type in host memory using thread_index_type = int64_t; ///< Thread index type in kernels +using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes /** * @brief Similar to `std::distance` but returns `cudf::size_type` and performs `static_cast` diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index a0bb555365e..34ffd7f0258 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -33,16 +33,16 @@ class PseudoLexer(RegexLexer): """Trivial lexer for pseudocode.""" - name = 'pseudocode' - aliases = ['pseudo'] + name = "pseudocode" + aliases = ["pseudo"] tokens = { - 'root': [ - (r'.*\n', PText), + "root": [ + (r".*\n", PText), ] } -lexers['pseudo'] = PseudoLexer() +lexers["pseudo"] = PseudoLexer() # -- Custom Extensions ---------------------------------------------------- sys.path.append(os.path.abspath("./_ext")) @@ -69,6 +69,7 @@ class PseudoLexer(RegexLexer): "myst_nb", ] + # Preprocess doxygen xml for compatibility with latest Breathe def clean_definitions(root): # Breathe can't handle SFINAE properly: @@ -105,10 +106,14 @@ def clean_definitions(root): pass break - # All of these in type declarations cause Breathe to choke. # For friend, see https://github.com/breathe-doc/breathe/issues/916 - strings_to_remove = ("__forceinline__", "CUDF_HOST_DEVICE", "decltype(auto)", "friend") + strings_to_remove = ( + "__forceinline__", + "CUDF_HOST_DEVICE", + "decltype(auto)", + "friend", + ) for node in root.iter(): for string in strings_to_remove: if node.text is not None: @@ -116,6 +121,7 @@ def clean_definitions(root): if node.tail is not None: node.tail = node.tail.replace(string, "") + def clean_all_xml_files(path): for fn in glob.glob(os.path.join(path, "*.xml")): tree = ET.parse(fn) @@ -130,7 +136,7 @@ def clean_all_xml_files(path): breathe_default_project = "libcudf" -nb_execution_excludepatterns = ['performance-comparisons.ipynb'] +nb_execution_excludepatterns = ["performance-comparisons.ipynb"] nb_execution_mode = "force" nb_execution_timeout = 300 @@ -163,9 +169,9 @@ def clean_all_xml_files(path): # built documents. # # The short X.Y version. -version = '24.02' +version = "24.02" # The full version, including alpha/beta/rc tags. -release = '24.02.00' +release = "24.02.00" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -177,7 +183,10 @@ def clean_all_xml_files(path): # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['venv', "**/includes/**",] +exclude_patterns = [ + "venv", + "**/includes/**", +] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" @@ -286,7 +295,10 @@ def clean_all_xml_files(path): "pyarrow": ("https://arrow.apache.org/docs/", None), "python": ("https://docs.python.org/3", None), "rmm": ("https://docs.rapids.ai/api/rmm/nightly/", None), - "typing_extensions": ("https://typing-extensions.readthedocs.io/en/stable/", None), + "typing_extensions": ( + "https://typing-extensions.readthedocs.io/en/stable/", + None, + ), } # Config numpydoc @@ -333,12 +345,15 @@ def _generate_namespaces(namespaces): all_namespaces.append(f"{base_namespace}::{other_namespace}::") return all_namespaces -_all_namespaces = _generate_namespaces({ - # Note that io::datasource is actually a nested class - "cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"}, - "numeric": {}, - "nvtext": {}, -}) + +_all_namespaces = _generate_namespaces( + { + # Note that io::datasource is actually a nested class + "cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"}, + "numeric": {}, + "nvtext": {}, + } +) _names_to_skip = { # External names @@ -369,10 +384,6 @@ def _generate_namespaces(namespaces): # Unsupported by Breathe # https://github.com/breathe-doc/breathe/issues/355 "deprecated", - # TODO: This type is currently defined in a detail header but it's in - # the public namespace. However, it's used in the detail header, so it - # needs to be put into a public header that can be shared. - "char_utf8", # TODO: This is currently in a src file but perhaps should be public "orc::column_statistics", # Sphinx doesn't know how to distinguish between the ORC and Parquet @@ -396,21 +407,24 @@ def _cached_intersphinx_lookup(env, node, contnode): key = (node, contnode) if key in _intersphinx_cache: return _intersphinx_cache[key] - if (ref := intersphinx.resolve_reference_detect_inventory(env, node, contnode)) is not None: + if ( + ref := intersphinx.resolve_reference_detect_inventory( + env, node, contnode + ) + ) is not None: _intersphinx_cache[key] = ref return ref def on_missing_reference(app, env, node, contnode): # These variables are defined outside the function to speed up the build. - global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, \ - _domain_objects, _prefixed_domain_objects, _intersphinx_cache + global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, _domain_objects, _prefixed_domain_objects, _intersphinx_cache # Precompute and cache domains for faster lookups if _domain_objects is None: _domain_objects = {} _prefixed_domain_objects = {} - for (name, _, _, docname, _, _) in env.domains["cpp"].get_objects(): + for name, _, _, docname, _, _ in env.domains["cpp"].get_objects(): _domain_objects[name] = docname for prefix in _all_namespaces: _prefixed_domain_objects[f"{prefix}{name}"] = name @@ -473,7 +487,9 @@ def on_missing_reference(app, env, node, contnode): # to fail. if reftarget != node["reftarget"]: node["reftarget"] = reftarget - if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None: + if ( + ref := _cached_intersphinx_lookup(env, node, contnode) + ) is not None: return ref # If the template wasn't the (only) issue, we check the various @@ -481,11 +497,15 @@ def on_missing_reference(app, env, node, contnode): for prefix in _intersphinx_extra_prefixes: if prefix not in reftarget: node["reftarget"] = f"{prefix}::{reftarget}" - if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None: + if ( + ref := _cached_intersphinx_lookup(env, node, contnode) + ) is not None: return ref else: node["reftarget"] = reftarget.replace(f"{prefix}::", "") - if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None: + if ( + ref := _cached_intersphinx_lookup(env, node, contnode) + ) is not None: return ref return None @@ -499,8 +519,11 @@ def on_missing_reference(app, env, node, contnode): ("py:class", "typing_extensions.Self"), ] + def setup(app): app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") - app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer") + app.add_js_file( + "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer" + ) app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", on_missing_reference)