Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Branch 24.04 merge branch 24.02 #14806

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cpp/include/cudf/labeling/label_bins.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -64,6 +64,7 @@ enum class inclusive { YES, NO };
* @param left_inclusive Whether or not the left edge is inclusive.
* @param right_edges Value of the right edge of each bin.
* @param right_inclusive Whether or not the right edge is inclusive.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device.
* @return The integer labels of the elements in `input` according to the specified bins.
*/
Expand All @@ -73,6 +74,7 @@ std::unique_ptr<column> label_bins(
inclusive left_inclusive,
column_view const& right_edges,
inclusive right_inclusive,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
5 changes: 1 addition & 4 deletions cpp/include/cudf/strings/detail/utf8.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,9 +23,6 @@
*/

namespace cudf {

using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes

namespace strings {
namespace detail {

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class mutable_column_view;
class string_view;
class list_view;
class struct_view;

class scalar;

// clang-format off
Expand Down Expand Up @@ -95,6 +94,7 @@ using size_type = int32_t; ///< Row index type for columns and tables
using bitmask_type = uint32_t; ///< Bitmask type stored as 32-bit unsigned integer
using valid_type = uint8_t; ///< Valid type in host memory
using thread_index_type = int64_t; ///< Thread index type in kernels
using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes

/**
* @brief Similar to `std::distance` but returns `cudf::size_type` and performs `static_cast`
Expand Down
12 changes: 4 additions & 8 deletions cpp/src/labeling/label_bins.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -236,15 +236,11 @@ std::unique_ptr<column> label_bins(column_view const& input,
inclusive left_inclusive,
column_view const& right_edges,
inclusive right_inclusive,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::label_bins(input,
left_edges,
left_inclusive,
right_edges,
right_inclusive,
cudf::get_default_stream(),
mr);
return detail::label_bins(
input, left_edges, left_inclusive, right_edges, right_inclusive, stream, mr);
}
} // namespace cudf
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,7 @@ ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_ORCIO_TEST streams/io/orc_test.cpp STREAM_MODE testing)
Expand Down
37 changes: 37 additions & 0 deletions cpp/tests/streams/labeling_bins_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/labeling/label_bins.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>

class LabelingBinsStreamTest : public cudf::test::BaseFixture {};

TEST_F(LabelingBinsStreamTest, SimpleStringsTest)
{
cudf::test::strings_column_wrapper left_edges{"a", "b", "c", "d", "e"};
cudf::test::strings_column_wrapper right_edges{"b", "c", "d", "e", "f"};
cudf::test::strings_column_wrapper input{"abc", "bcd", "cde", "def", "efg"};

cudf::label_bins(input,
left_edges,
cudf::inclusive::YES,
right_edges,
cudf::inclusive::NO,
cudf::test::get_default_stream());
}
83 changes: 53 additions & 30 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@
class PseudoLexer(RegexLexer):
"""Trivial lexer for pseudocode."""

name = 'pseudocode'
aliases = ['pseudo']
name = "pseudocode"
aliases = ["pseudo"]
tokens = {
'root': [
(r'.*\n', PText),
"root": [
(r".*\n", PText),
]
}


lexers['pseudo'] = PseudoLexer()
lexers["pseudo"] = PseudoLexer()

# -- Custom Extensions ----------------------------------------------------
sys.path.append(os.path.abspath("./_ext"))
Expand All @@ -69,6 +69,7 @@ class PseudoLexer(RegexLexer):
"myst_nb",
]


# Preprocess doxygen xml for compatibility with latest Breathe
def clean_definitions(root):
# Breathe can't handle SFINAE properly:
Expand Down Expand Up @@ -105,17 +106,22 @@ def clean_definitions(root):
pass
break


# All of these in type declarations cause Breathe to choke.
# For friend, see https://github.com/breathe-doc/breathe/issues/916
strings_to_remove = ("__forceinline__", "CUDF_HOST_DEVICE", "decltype(auto)", "friend")
strings_to_remove = (
"__forceinline__",
"CUDF_HOST_DEVICE",
"decltype(auto)",
"friend",
)
for node in root.iter():
for string in strings_to_remove:
if node.text is not None:
node.text = node.text.replace(string, "")
if node.tail is not None:
node.tail = node.tail.replace(string, "")


def clean_all_xml_files(path):
for fn in glob.glob(os.path.join(path, "*.xml")):
tree = ET.parse(fn)
Expand All @@ -130,7 +136,7 @@ def clean_all_xml_files(path):
breathe_default_project = "libcudf"


nb_execution_excludepatterns = ['performance-comparisons.ipynb']
nb_execution_excludepatterns = ["performance-comparisons.ipynb"]

nb_execution_mode = "force"
nb_execution_timeout = 300
Expand Down Expand Up @@ -163,9 +169,9 @@ def clean_all_xml_files(path):
# built documents.
#
# The short X.Y version.
version = '24.04'
version = "24.04"
# The full version, including alpha/beta/rc tags.
release = '24.04.00'
release = "24.04.00"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand All @@ -177,7 +183,10 @@ def clean_all_xml_files(path):
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['venv', "**/includes/**",]
exclude_patterns = [
"venv",
"**/includes/**",
]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
Expand Down Expand Up @@ -286,7 +295,10 @@ def clean_all_xml_files(path):
"pyarrow": ("https://arrow.apache.org/docs/", None),
"python": ("https://docs.python.org/3", None),
"rmm": ("https://docs.rapids.ai/api/rmm/nightly/", None),
"typing_extensions": ("https://typing-extensions.readthedocs.io/en/stable/", None),
"typing_extensions": (
"https://typing-extensions.readthedocs.io/en/stable/",
None,
),
}

# Config numpydoc
Expand Down Expand Up @@ -333,12 +345,15 @@ def _generate_namespaces(namespaces):
all_namespaces.append(f"{base_namespace}::{other_namespace}::")
return all_namespaces

_all_namespaces = _generate_namespaces({
# Note that io::datasource is actually a nested class
"cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
"numeric": {},
"nvtext": {},
})

_all_namespaces = _generate_namespaces(
{
# Note that io::datasource is actually a nested class
"cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
"numeric": {},
"nvtext": {},
}
)

_names_to_skip = {
# External names
Expand Down Expand Up @@ -369,10 +384,6 @@ def _generate_namespaces(namespaces):
# Unsupported by Breathe
# https://github.com/breathe-doc/breathe/issues/355
"deprecated",
# TODO: This type is currently defined in a detail header but it's in
# the public namespace. However, it's used in the detail header, so it
# needs to be put into a public header that can be shared.
"char_utf8",
# TODO: This is currently in a src file but perhaps should be public
"orc::column_statistics",
# Sphinx doesn't know how to distinguish between the ORC and Parquet
Expand All @@ -396,21 +407,24 @@ def _cached_intersphinx_lookup(env, node, contnode):
key = (node, contnode)
if key in _intersphinx_cache:
return _intersphinx_cache[key]
if (ref := intersphinx.resolve_reference_detect_inventory(env, node, contnode)) is not None:
if (
ref := intersphinx.resolve_reference_detect_inventory(
env, node, contnode
)
) is not None:
_intersphinx_cache[key] = ref
return ref


def on_missing_reference(app, env, node, contnode):
# These variables are defined outside the function to speed up the build.
global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, \
_domain_objects, _prefixed_domain_objects, _intersphinx_cache
global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, _domain_objects, _prefixed_domain_objects, _intersphinx_cache

# Precompute and cache domains for faster lookups
if _domain_objects is None:
_domain_objects = {}
_prefixed_domain_objects = {}
for (name, _, _, docname, _, _) in env.domains["cpp"].get_objects():
for name, _, _, docname, _, _ in env.domains["cpp"].get_objects():
_domain_objects[name] = docname
for prefix in _all_namespaces:
_prefixed_domain_objects[f"{prefix}{name}"] = name
Expand Down Expand Up @@ -473,19 +487,25 @@ def on_missing_reference(app, env, node, contnode):
# to fail.
if reftarget != node["reftarget"]:
node["reftarget"] = reftarget
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref

# If the template wasn't the (only) issue, we check the various
# namespace prefixes that may need to be added or removed.
for prefix in _intersphinx_extra_prefixes:
if prefix not in reftarget:
node["reftarget"] = f"{prefix}::{reftarget}"
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref
else:
node["reftarget"] = reftarget.replace(f"{prefix}::", "")
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref

return None
Expand All @@ -499,8 +519,11 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "typing_extensions.Self"),
]


def setup(app):
app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
app.add_js_file(
"https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
)
app.connect("doctree-read", resolve_aliases)
app.connect("missing-reference", on_missing_reference)