Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into cudf/_lib/text
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Dec 6, 2024
2 parents 63cdf0f + 84690b5 commit eecfebb
Show file tree
Hide file tree
Showing 286 changed files with 4,781 additions and 5,318 deletions.
1 change: 1 addition & 0 deletions .github/copy-pr-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/

enabled: true
auto_sync_draft: false
5 changes: 5 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
files_yaml: |
test_cpp:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand All @@ -71,6 +72,7 @@ jobs:
- '!python/**'
test_cudf_pandas:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
Expand All @@ -79,6 +81,7 @@ jobs:
- '!notebooks/**'
test_java:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand All @@ -88,12 +91,14 @@ jobs:
- '!python/**'
test_notebooks:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!java/**'
test_python:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand Down
26 changes: 26 additions & 0 deletions .github/workflows/trigger-breaking-change-alert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Trigger Breaking Change Notifications

on:
pull_request_target:
types:
- closed
- reopened
- labeled
- unlabeled

jobs:
trigger-notifier:
if: contains(github.event.pull_request.labels.*.name, 'breaking')
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
sender_login: ${{ github.event.sender.login }}
sender_avatar: ${{ github.event.sender.avatar_url }}
repo: ${{ github.repository }}
pr_number: ${{ github.event.pull_request.number }}
pr_title: "${{ github.event.pull_request.title }}"
pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}"
pr_base_ref: ${{ github.event.pull_request.base.ref }}
pr_author: ${{ github.event.pull_request.user.login }}
event_action: ${{ github.event.action }}
pr_merged: ${{ github.event.pull_request.merged }}
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: |
Expand All @@ -17,11 +17,11 @@ repos:
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
rev: v0.16.6
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.10.0'
rev: 'v1.13.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
Expand All @@ -33,7 +33,7 @@ repos:
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.8.5
rev: 1.9.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
Expand All @@ -52,7 +52,7 @@ repos:
^cpp/include/cudf_test/cxxopts.hpp
)
- repo: https://github.com/sirosen/texthooks
rev: 0.6.6
rev: 0.6.7
hooks:
- id: fix-smartquotes
exclude: |
Expand Down Expand Up @@ -133,7 +133,7 @@ repos:
pass_filenames: false
verbose: true
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
additional_dependencies: [tomli]
Expand All @@ -144,7 +144,7 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
rev: v0.8.0
hooks:
- id: ruff
args: ["--fix"]
Expand Down
2 changes: 1 addition & 1 deletion ci/cpp_linters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ source rapids-configure-sccache
# Run the build via CMake, which will run clang-tidy when CUDF_STATIC_LINTERS is enabled.

iwyu_flag=""
if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
if [[ "${RAPIDS_BUILD_TYPE:-}" == "nightly" ]]; then
iwyu_flag="-DCUDF_IWYU=ON"
fi
cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON ${iwyu_flag} -DBUILD_TESTS=OFF -GNinja
Expand Down
5 changes: 1 addition & 4 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
- cuda-python>=11.7.1,<12.0a0
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand Down Expand Up @@ -80,7 +80,6 @@ dependencies:
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
- rich
Expand All @@ -97,8 +96,6 @@ dependencies:
- sphinxcontrib-websupport
- streamz
- sysroot_linux-64==2.17
- tokenizers==0.15.2
- transformers==4.39.3
- typing_extensions>=4.0.0
- zlib>=1.2.13
name: all_cuda-118_arch-x86_64
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
- cuda-python>=12.0,<13.0a0,<=12.6.0
- cuda-python>=12.0,<13.0a0
- cuda-sanitizer-api
- cuda-version=12.5
- cupy>=12.0.0
Expand Down Expand Up @@ -78,7 +78,7 @@ dependencies:
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- pytorch>=2.4.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
- rich
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ requirements:
- cudatoolkit
- ptxcompiler >=0.7.0
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0a0,<=11.8.3
- cuda-python >=11.7.1,<12.0a0
{% else %}
- cuda-cudart
- libcufile # [linux64]
Expand All @@ -100,7 +100,7 @@ requirements:
# TODO: Add nvjitlink here
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0,<=12.6.0
- cuda-python >=12.0,<13.0a0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ requirements:
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
- cuda-python >=11.7.1,<12.0a0,<=11.8.3
- cuda-python >=11.7.1,<12.0a0
{% else %}
- cuda-python >=12.0,<13.0a0,<=12.6.0
- cuda-python >=12.0,<13.0a0
{% endif %}
- nvtx >=0.2.1
- packaging
Expand Down
29 changes: 22 additions & 7 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ option(
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
option(CUDF_CLANG_TIDY "Enable clang-tidy during compilation" OFF)
option(CUDF_IWYU "Enable IWYU during compilation" OFF)
option(CUDF_CLANG_TIDY_AUTOFIX "Enable clang-tidy autofixes" OFF)

option(
CUDF_KVIKIO_REMOTE_IO
Expand Down Expand Up @@ -205,9 +206,16 @@ function(enable_static_checkers target)
if(_LINT_CLANG_TIDY)
# clang will complain about unused link libraries on the compile line unless we specify
# -Qunused-arguments.
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
if(CUDF_CLANG_TIDY_AUTOFIX)
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY
"${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments;--fix"
)
else()
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
endif()
endif()
if(_LINT_IWYU)
# A few extra warnings pop up when building with IWYU. I'm not sure why, but they are not
Expand Down Expand Up @@ -926,9 +934,16 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool> spdlog::spdlog_header_only
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
kvikio::kvikio $<TARGET_NAME_IF_EXISTS:CUDA::cuFile${_cufile_suffix}> nanoarrow
PUBLIC CCCL::CCCL rmm::rmm rmm::rmm_logger $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
spdlog::spdlog_header_only
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
cuco::cuco
ZLIB::ZLIB
nvcomp::nvcomp
kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:CUDA::cuFile${_cufile_suffix}>
nanoarrow
rmm::rmm_logger_impl
)

# Add Conda library, and include paths if specified
Expand Down Expand Up @@ -1007,7 +1022,7 @@ if(CUDF_BUILD_TESTUTIL)
)

target_link_libraries(
cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
$<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
3 changes: 1 addition & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,6 @@ ConfigureNVBench(

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH string/factory.cu)

ConfigureNVBench(
STRINGS_NVBENCH
string/case.cpp
Expand All @@ -377,6 +375,7 @@ ConfigureNVBench(
string/copy_range.cpp
string/count.cpp
string/extract.cpp
string/factory.cpp
string/filter.cpp
string/find.cpp
string/find_multiple.cpp
Expand Down
60 changes: 60 additions & 0 deletions cpp/benchmarks/string/factory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/device_uvector.hpp>

#include <nvbench/nvbench.cuh>

#include <limits>

static void bench_factory(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
auto const sv = cudf::strings_column_view(column->view());

auto stream = cudf::get_default_stream();
auto mr = cudf::get_current_device_resource_ref();
auto d_strings = cudf::strings::detail::create_string_vector_from_column(sv, stream, mr);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto chars_size = sv.chars_size(stream);
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0});
});
}

NVBENCH_BENCH(bench_factory)
.set_name("factory")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("num_rows", {32768, 262144, 2097152});
Loading

0 comments on commit eecfebb

Please sign in to comment.