Move cudf::char_utf8 definition from detail to public header (#14779)

Moves the `cudf::char_utf8` definition from the `cudf/strings/detail/utf8.hpp` to `cudf/types.hpp` since it is declared in the public namespace and used in public functions. Reference: https://github.com/rapidsai/cudf/blob/9acddc08cc209e8d6b94891be6131edd63ff5b43/docs/cudf/source/conf.py#L372-L375 Authors: - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) URL: #14779
rapidsai · Jan 19, 2024 · 446da75 · 446da75
1 parent 9bb9af6
commit 446da75
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 35 deletions.
diff --git a/cpp/include/cudf/strings/detail/utf8.hpp b/cpp/include/cudf/strings/detail/utf8.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,9 +23,6 @@
  */
 
 namespace cudf {
-
-using char_utf8 = uint32_t;  ///< UTF-8 characters are 1-4 bytes
-
 namespace strings {
 namespace detail {
 

diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
@@ -62,7 +62,6 @@ class mutable_column_view;
 class string_view;
 class list_view;
 class struct_view;
-
 class scalar;
 
 // clang-format off
@@ -95,6 +94,7 @@ using size_type         = int32_t;   ///< Row index type for columns and tables
 using bitmask_type      = uint32_t;  ///< Bitmask type stored as 32-bit unsigned integer
 using valid_type        = uint8_t;   ///< Valid type in host memory
 using thread_index_type = int64_t;   ///< Thread index type in kernels
+using char_utf8         = uint32_t;  ///< UTF-8 characters are 1-4 bytes
 
 /**
  * @brief Similar to `std::distance` but returns `cudf::size_type` and performs `static_cast`

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
@@ -33,16 +33,16 @@
 class PseudoLexer(RegexLexer):
     """Trivial lexer for pseudocode."""
 
-    name = 'pseudocode'
-    aliases = ['pseudo']
+    name = "pseudocode"
+    aliases = ["pseudo"]
     tokens = {
-        'root': [
-            (r'.*\n', PText),
+        "root": [
+            (r".*\n", PText),
         ]
     }
 
 
-lexers['pseudo'] = PseudoLexer()
+lexers["pseudo"] = PseudoLexer()
 
 # -- Custom Extensions ----------------------------------------------------
 sys.path.append(os.path.abspath("./_ext"))
@@ -69,6 +69,7 @@ class PseudoLexer(RegexLexer):
     "myst_nb",
 ]
 
+
 # Preprocess doxygen xml for compatibility with latest Breathe
 def clean_definitions(root):
     # Breathe can't handle SFINAE properly:
@@ -105,17 +106,22 @@ def clean_definitions(root):
                                     pass
                                 break
 
-
     # All of these in type declarations cause Breathe to choke.
     # For friend, see https://github.com/breathe-doc/breathe/issues/916
-    strings_to_remove = ("__forceinline__", "CUDF_HOST_DEVICE", "decltype(auto)", "friend")
+    strings_to_remove = (
+        "__forceinline__",
+        "CUDF_HOST_DEVICE",
+        "decltype(auto)",
+        "friend",
+    )
     for node in root.iter():
         for string in strings_to_remove:
             if node.text is not None:
                 node.text = node.text.replace(string, "")
             if node.tail is not None:
                 node.tail = node.tail.replace(string, "")
 
+
 def clean_all_xml_files(path):
     for fn in glob.glob(os.path.join(path, "*.xml")):
         tree = ET.parse(fn)
@@ -130,7 +136,7 @@ def clean_all_xml_files(path):
 breathe_default_project = "libcudf"
 
 
-nb_execution_excludepatterns = ['performance-comparisons.ipynb']
+nb_execution_excludepatterns = ["performance-comparisons.ipynb"]
 
 nb_execution_mode = "force"
 nb_execution_timeout = 300
@@ -163,9 +169,9 @@ def clean_all_xml_files(path):
 # built documents.
 #
 # The short X.Y version.
-version = '24.02'
+version = "24.02"
 # The full version, including alpha/beta/rc tags.
-release = '24.02.00'
+release = "24.02.00"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -177,7 +183,10 @@ def clean_all_xml_files(path):
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['venv', "**/includes/**",]
+exclude_patterns = [
+    "venv",
+    "**/includes/**",
+]
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = "sphinx"
@@ -286,7 +295,10 @@ def clean_all_xml_files(path):
     "pyarrow": ("https://arrow.apache.org/docs/", None),
     "python": ("https://docs.python.org/3", None),
     "rmm": ("https://docs.rapids.ai/api/rmm/nightly/", None),
-    "typing_extensions": ("https://typing-extensions.readthedocs.io/en/stable/", None),
+    "typing_extensions": (
+        "https://typing-extensions.readthedocs.io/en/stable/",
+        None,
+    ),
 }
 
 # Config numpydoc
@@ -333,12 +345,15 @@ def _generate_namespaces(namespaces):
             all_namespaces.append(f"{base_namespace}::{other_namespace}::")
     return all_namespaces
 
-_all_namespaces = _generate_namespaces({
-    # Note that io::datasource is actually a nested class
-    "cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
-    "numeric": {},
-    "nvtext": {},
-})
+
+_all_namespaces = _generate_namespaces(
+    {
+        # Note that io::datasource is actually a nested class
+        "cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
+        "numeric": {},
+        "nvtext": {},
+    }
+)
 
 _names_to_skip = {
     # External names
@@ -369,10 +384,6 @@ def _generate_namespaces(namespaces):
     # Unsupported by Breathe
     # https://github.com/breathe-doc/breathe/issues/355
     "deprecated",
-    # TODO: This type is currently defined in a detail header but it's in
-    # the public namespace. However, it's used in the detail header, so it
-    # needs to be put into a public header that can be shared.
-    "char_utf8",
     # TODO: This is currently in a src file but perhaps should be public
     "orc::column_statistics",
     # Sphinx doesn't know how to distinguish between the ORC and Parquet
@@ -396,21 +407,24 @@ def _cached_intersphinx_lookup(env, node, contnode):
     key = (node, contnode)
     if key in _intersphinx_cache:
         return _intersphinx_cache[key]
-    if (ref := intersphinx.resolve_reference_detect_inventory(env, node, contnode)) is not None:
+    if (
+        ref := intersphinx.resolve_reference_detect_inventory(
+            env, node, contnode
+        )
+    ) is not None:
         _intersphinx_cache[key] = ref
     return ref
 
 
 def on_missing_reference(app, env, node, contnode):
     # These variables are defined outside the function to speed up the build.
-    global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, \
-        _domain_objects, _prefixed_domain_objects, _intersphinx_cache
+    global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, _domain_objects, _prefixed_domain_objects, _intersphinx_cache
 
     # Precompute and cache domains for faster lookups
     if _domain_objects is None:
         _domain_objects = {}
         _prefixed_domain_objects = {}
-        for (name, _, _, docname, _, _) in env.domains["cpp"].get_objects():
+        for name, _, _, docname, _, _ in env.domains["cpp"].get_objects():
             _domain_objects[name] = docname
             for prefix in _all_namespaces:
                 _prefixed_domain_objects[f"{prefix}{name}"] = name
@@ -473,19 +487,25 @@ def on_missing_reference(app, env, node, contnode):
         # to fail.
         if reftarget != node["reftarget"]:
             node["reftarget"] = reftarget
-            if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
+            if (
+                ref := _cached_intersphinx_lookup(env, node, contnode)
+            ) is not None:
                 return ref
 
         # If the template wasn't the (only) issue, we check the various
         # namespace prefixes that may need to be added or removed.
         for prefix in _intersphinx_extra_prefixes:
             if prefix not in reftarget:
                 node["reftarget"] = f"{prefix}::{reftarget}"
-                if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
+                if (
+                    ref := _cached_intersphinx_lookup(env, node, contnode)
+                ) is not None:
                     return ref
             else:
                 node["reftarget"] = reftarget.replace(f"{prefix}::", "")
-                if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
+                if (
+                    ref := _cached_intersphinx_lookup(env, node, contnode)
+                ) is not None:
                     return ref
 
     return None
@@ -499,8 +519,11 @@ def on_missing_reference(app, env, node, contnode):
     ("py:class", "typing_extensions.Self"),
 ]
 
+
 def setup(app):
     app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
-    app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
+    app.add_js_file(
+        "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
+    )
     app.connect("doctree-read", resolve_aliases)
     app.connect("missing-reference", on_missing_reference)