Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into fix-cuco-conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel authored Jan 25, 2024
2 parents 53c33d7 + 59199da commit 757d225
Show file tree
Hide file tree
Showing 102 changed files with 3,388 additions and 1,920 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
conda-cpp-checks:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
with:
build_type: pull-request
enable_check_symbols: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:
jobs:
conda-cpp-checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand Down
308 changes: 154 additions & 154 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,159 +1,159 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
exclude: |
(?x)^(
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- id: end-of-file-fixer
exclude: |
(?x)^(
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
types_or: [python, cython, pyi]
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
files: python/.*
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config", "pyproject.toml"]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.15.0
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.3.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
args: ["--config-file=pyproject.toml",
"python/cudf/cudf",
"python/custreamz/custreamz",
"python/cudf_kafka/cudf_kafka",
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/PyCQA/pydocstyle
rev: 6.1.1
hooks:
- id: pydocstyle
# https://github.com/PyCQA/pydocstyle/issues/603
additional_dependencies: [toml]
args: ["--config=pyproject.toml"]
exclude: |
(?x)^(
^python/cudf/cudf/pandas/scripts/.*|
^python/cudf/cudf_pandas_tests/.*
)
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.6.3
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- id: nbqa-black
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
- id: clang-format
types_or: [c, c++, cuda]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/sirosen/texthooks
rev: 0.4.0
hooks:
- id: fix-smartquotes
exclude: |
(?x)^(
^cpp/include/cudf_test/cxxopts.hpp|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*|
^python/cudf/cudf/tests/text/test_text_methods.py
)
- repo: local
hooks:
- id: no-deprecationwarning
name: no-deprecationwarning
description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)'
entry: '(category=|\s)DeprecationWarning[,)]'
language: pygrep
types_or: [python, cython]
- id: no-programmatic-xfail
name: no-programmatic-xfail
description: 'Enforce that pytest.xfail is not introduced (see dev docs for details)'
entry: 'pytest\.xfail'
language: pygrep
types: [python]
- id: cmake-format
name: cmake-format
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: cmake-lint
name: cmake-lint
entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: copyright-check
name: copyright-check
entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year
language: python
pass_filenames: false
additional_dependencies: [gitpython]
- id: doxygen-check
name: doxygen-check
entry: ./ci/checks/doxygen.sh
files: ^cpp/include/
types_or: [file]
language: system
pass_filenames: false
verbose: true
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: |
(?x)^(
.*test.*|
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.8.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.278
hooks:
- id: ruff
files: python/.*$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
exclude: |
(?x)^(
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- id: end-of-file-fixer
exclude: |
(?x)^(
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
types_or: [python, cython, pyi]
- repo: https://github.com/psf/black
rev: 23.12.1
hooks:
- id: black
files: python/.*
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config", "pyproject.toml"]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.0
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.3.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
args: ["--config-file=pyproject.toml",
"python/cudf/cudf",
"python/custreamz/custreamz",
"python/cudf_kafka/cudf_kafka",
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/PyCQA/pydocstyle
rev: 6.3.0
hooks:
- id: pydocstyle
# https://github.com/PyCQA/pydocstyle/issues/603
additional_dependencies: [tomli]
args: ["--config=pyproject.toml"]
exclude: |
(?x)^(
^python/cudf/cudf/pandas/scripts/.*|
^python/cudf/cudf_pandas_tests/.*
)
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.7.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- id: nbqa-black
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
- id: clang-format
types_or: [c, c++, cuda]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/sirosen/texthooks
rev: 0.6.3
hooks:
- id: fix-smartquotes
exclude: |
(?x)^(
^cpp/include/cudf_test/cxxopts.hpp|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*|
^python/cudf/cudf/tests/text/test_text_methods.py
)
- repo: local
hooks:
- id: no-deprecationwarning
name: no-deprecationwarning
description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)'
entry: '(category=|\s)DeprecationWarning[,)]'
language: pygrep
types_or: [python, cython]
- id: no-programmatic-xfail
name: no-programmatic-xfail
description: 'Enforce that pytest.xfail is not introduced (see dev docs for details)'
entry: 'pytest\.xfail'
language: pygrep
types: [python]
- id: cmake-format
name: cmake-format
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: cmake-lint
name: cmake-lint
entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: copyright-check
name: copyright-check
entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year
language: python
pass_filenames: false
additional_dependencies: [gitpython]
- id: doxygen-check
name: doxygen-check
entry: ./ci/checks/doxygen.sh
files: ^cpp/include/
types_or: [file]
language: system
pass_filenames: false
verbose: true
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: |
(?x)^(
.*test.*|
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.8.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.13
hooks:
- id: ruff
files: python/.*$


default_language_version:
Expand Down
8 changes: 4 additions & 4 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHOR
sed_runner 's/PROJECT_NUMBER = .*/PROJECT_NUMBER = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile

# sphinx docs update
sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cudf/source/conf.py
sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cudf/source/conf.py
sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/dask_cudf/source/conf.py
sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/dask_cudf/source/conf.py
sed_runner 's/version = .*/version = "'${NEXT_SHORT_TAG}'"/g' docs/cudf/source/conf.py
sed_runner 's/release = .*/release = "'${NEXT_FULL_TAG}'"/g' docs/cudf/source/conf.py
sed_runner 's/version = .*/version = "'${NEXT_SHORT_TAG}'"/g' docs/dask_cudf/source/conf.py
sed_runner 's/release = .*/release = "'${NEXT_FULL_TAG}'"/g' docs/dask_cudf/source/conf.py

DEPENDENCIES=(
cudf
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ add_library(
src/io/functions.cpp
src/io/json/byte_range_info.cu
src/io/json/json_column.cu
src/io/json/json_quote_normalization.cu
src/io/json/json_tree.cu
src/io/json/nested_json_gpu.cu
src/io/json/read_json.cu
Expand Down
3 changes: 1 addition & 2 deletions cpp/examples/strings/custom_optimized.cu
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
*d_names, *d_visibilities, offsets.data(), chars.data());

// create column from offsets and chars vectors (no copy is performed)
auto result =
cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars), {}, 0);
auto result = cudf::make_strings_column(names.size(), std::move(offsets), chars.release(), {}, 0);

// wait for all of the above to finish
stream.synchronize();
Expand Down
14 changes: 13 additions & 1 deletion cpp/include/cudf/io/detail/json.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,4 +51,16 @@ void write_json(data_sink* sink,
json_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Normalize single quotes to double quotes using FST
*
* @param inbuf Input device buffer
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
rmm::device_uvector<char> normalize_single_quotes(rmm::device_uvector<char>&& inbuf,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace cudf::io::json::detail
Loading

0 comments on commit 757d225

Please sign in to comment.