Skip to content

Commit

Permalink
Move cudf::char_utf8 definition from detail to public header (#14779)
Browse files Browse the repository at this point in the history
Moves the `cudf::char_utf8` definition from the `cudf/strings/detail/utf8.hpp` to `cudf/types.hpp` since it is declared in the public namespace and used in public functions.

Reference: https://github.com/rapidsai/cudf/blob/9acddc08cc209e8d6b94891be6131edd63ff5b43/docs/cudf/source/conf.py#L372-L375

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #14779
  • Loading branch information
davidwendt authored Jan 19, 2024
1 parent 9bb9af6 commit 446da75
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 35 deletions.
5 changes: 1 addition & 4 deletions cpp/include/cudf/strings/detail/utf8.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,9 +23,6 @@
*/

namespace cudf {

using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes

namespace strings {
namespace detail {

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class mutable_column_view;
class string_view;
class list_view;
class struct_view;

class scalar;

// clang-format off
Expand Down Expand Up @@ -95,6 +94,7 @@ using size_type = int32_t; ///< Row index type for columns and tables
using bitmask_type = uint32_t; ///< Bitmask type stored as 32-bit unsigned integer
using valid_type = uint8_t; ///< Valid type in host memory
using thread_index_type = int64_t; ///< Thread index type in kernels
using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes

/**
* @brief Similar to `std::distance` but returns `cudf::size_type` and performs `static_cast`
Expand Down
83 changes: 53 additions & 30 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@
class PseudoLexer(RegexLexer):
"""Trivial lexer for pseudocode."""

name = 'pseudocode'
aliases = ['pseudo']
name = "pseudocode"
aliases = ["pseudo"]
tokens = {
'root': [
(r'.*\n', PText),
"root": [
(r".*\n", PText),
]
}


lexers['pseudo'] = PseudoLexer()
lexers["pseudo"] = PseudoLexer()

# -- Custom Extensions ----------------------------------------------------
sys.path.append(os.path.abspath("./_ext"))
Expand All @@ -69,6 +69,7 @@ class PseudoLexer(RegexLexer):
"myst_nb",
]


# Preprocess doxygen xml for compatibility with latest Breathe
def clean_definitions(root):
# Breathe can't handle SFINAE properly:
Expand Down Expand Up @@ -105,17 +106,22 @@ def clean_definitions(root):
pass
break


# All of these in type declarations cause Breathe to choke.
# For friend, see https://github.com/breathe-doc/breathe/issues/916
strings_to_remove = ("__forceinline__", "CUDF_HOST_DEVICE", "decltype(auto)", "friend")
strings_to_remove = (
"__forceinline__",
"CUDF_HOST_DEVICE",
"decltype(auto)",
"friend",
)
for node in root.iter():
for string in strings_to_remove:
if node.text is not None:
node.text = node.text.replace(string, "")
if node.tail is not None:
node.tail = node.tail.replace(string, "")


def clean_all_xml_files(path):
for fn in glob.glob(os.path.join(path, "*.xml")):
tree = ET.parse(fn)
Expand All @@ -130,7 +136,7 @@ def clean_all_xml_files(path):
breathe_default_project = "libcudf"


nb_execution_excludepatterns = ['performance-comparisons.ipynb']
nb_execution_excludepatterns = ["performance-comparisons.ipynb"]

nb_execution_mode = "force"
nb_execution_timeout = 300
Expand Down Expand Up @@ -163,9 +169,9 @@ def clean_all_xml_files(path):
# built documents.
#
# The short X.Y version.
version = '24.02'
version = "24.02"
# The full version, including alpha/beta/rc tags.
release = '24.02.00'
release = "24.02.00"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand All @@ -177,7 +183,10 @@ def clean_all_xml_files(path):
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['venv', "**/includes/**",]
exclude_patterns = [
"venv",
"**/includes/**",
]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
Expand Down Expand Up @@ -286,7 +295,10 @@ def clean_all_xml_files(path):
"pyarrow": ("https://arrow.apache.org/docs/", None),
"python": ("https://docs.python.org/3", None),
"rmm": ("https://docs.rapids.ai/api/rmm/nightly/", None),
"typing_extensions": ("https://typing-extensions.readthedocs.io/en/stable/", None),
"typing_extensions": (
"https://typing-extensions.readthedocs.io/en/stable/",
None,
),
}

# Config numpydoc
Expand Down Expand Up @@ -333,12 +345,15 @@ def _generate_namespaces(namespaces):
all_namespaces.append(f"{base_namespace}::{other_namespace}::")
return all_namespaces

_all_namespaces = _generate_namespaces({
# Note that io::datasource is actually a nested class
"cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
"numeric": {},
"nvtext": {},
})

_all_namespaces = _generate_namespaces(
{
# Note that io::datasource is actually a nested class
"cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
"numeric": {},
"nvtext": {},
}
)

_names_to_skip = {
# External names
Expand Down Expand Up @@ -369,10 +384,6 @@ def _generate_namespaces(namespaces):
# Unsupported by Breathe
# https://github.com/breathe-doc/breathe/issues/355
"deprecated",
# TODO: This type is currently defined in a detail header but it's in
# the public namespace. However, it's used in the detail header, so it
# needs to be put into a public header that can be shared.
"char_utf8",
# TODO: This is currently in a src file but perhaps should be public
"orc::column_statistics",
# Sphinx doesn't know how to distinguish between the ORC and Parquet
Expand All @@ -396,21 +407,24 @@ def _cached_intersphinx_lookup(env, node, contnode):
key = (node, contnode)
if key in _intersphinx_cache:
return _intersphinx_cache[key]
if (ref := intersphinx.resolve_reference_detect_inventory(env, node, contnode)) is not None:
if (
ref := intersphinx.resolve_reference_detect_inventory(
env, node, contnode
)
) is not None:
_intersphinx_cache[key] = ref
return ref


def on_missing_reference(app, env, node, contnode):
# These variables are defined outside the function to speed up the build.
global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, \
_domain_objects, _prefixed_domain_objects, _intersphinx_cache
global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, _domain_objects, _prefixed_domain_objects, _intersphinx_cache

# Precompute and cache domains for faster lookups
if _domain_objects is None:
_domain_objects = {}
_prefixed_domain_objects = {}
for (name, _, _, docname, _, _) in env.domains["cpp"].get_objects():
for name, _, _, docname, _, _ in env.domains["cpp"].get_objects():
_domain_objects[name] = docname
for prefix in _all_namespaces:
_prefixed_domain_objects[f"{prefix}{name}"] = name
Expand Down Expand Up @@ -473,19 +487,25 @@ def on_missing_reference(app, env, node, contnode):
# to fail.
if reftarget != node["reftarget"]:
node["reftarget"] = reftarget
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref

# If the template wasn't the (only) issue, we check the various
# namespace prefixes that may need to be added or removed.
for prefix in _intersphinx_extra_prefixes:
if prefix not in reftarget:
node["reftarget"] = f"{prefix}::{reftarget}"
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref
else:
node["reftarget"] = reftarget.replace(f"{prefix}::", "")
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref

return None
Expand All @@ -499,8 +519,11 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "typing_extensions.Self"),
]


def setup(app):
app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
app.add_js_file(
"https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
)
app.connect("doctree-read", resolve_aliases)
app.connect("missing-reference", on_missing_reference)

0 comments on commit 446da75

Please sign in to comment.