Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/b…
Browse files Browse the repository at this point in the history
…inops
  • Loading branch information
mroeschke committed Dec 2, 2024
2 parents f24aa26 + 852338e commit a9b9631
Show file tree
Hide file tree
Showing 109 changed files with 440 additions and 389 deletions.
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: |
Expand All @@ -17,11 +17,11 @@ repos:
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
rev: v0.16.6
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.10.0'
rev: 'v1.13.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
Expand All @@ -33,7 +33,7 @@ repos:
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.8.5
rev: 1.9.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
Expand All @@ -52,7 +52,7 @@ repos:
^cpp/include/cudf_test/cxxopts.hpp
)
- repo: https://github.com/sirosen/texthooks
rev: 0.6.6
rev: 0.6.7
hooks:
- id: fix-smartquotes
exclude: |
Expand Down Expand Up @@ -133,7 +133,7 @@ repos:
pass_filenames: false
verbose: true
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
additional_dependencies: [tomli]
Expand All @@ -144,7 +144,7 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
rev: v0.8.0
hooks:
- id: ruff
args: ["--fix"]
Expand Down
3 changes: 0 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ dependencies:
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
- rich
Expand All @@ -97,8 +96,6 @@ dependencies:
- sphinxcontrib-websupport
- streamz
- sysroot_linux-64==2.17
- tokenizers==0.15.2
- transformers==4.39.3
- typing_extensions>=4.0.0
- zlib>=1.2.13
name: all_cuda-118_arch-x86_64
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ dependencies:
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- pytorch>=2.4.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
- rich
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/lists/set_operations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ std::unique_ptr<column> have_overlap(lists_column_view const& lhs,
// - Generate labels for lhs and rhs child elements.
// - Check existence for rows of the table {rhs_labels, rhs_child} in the table
// {lhs_labels, lhs_child}.
// - `reduce_by_key` with keys are rhs_labels and `logical_or` reduction on the existence reults
// - `reduce_by_key` with keys are rhs_labels and `logical_or` reduction on the existence results
// computed in the previous step.

auto const lhs_child = lhs.get_sliced_child(stream);
Expand Down
7 changes: 2 additions & 5 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -885,12 +885,9 @@ dependencies:
- output_types: conda
matrices:
- matrix:
arch: x86_64
cuda: "12.*"
packages:
# Currently, CUDA + aarch64 builds of pytorch do not exist on conda-forge.
- pytorch>=2.1.0
# We only install these on x86_64 to avoid pulling pytorch as a
# dependency of transformers.
- pytorch>=2.4.0
- *tokenizers
- *transformers
- matrix:
Expand Down
25 changes: 22 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ exclude = [
skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,./cpp/tests,./python/cudf/cudf/tests,./java/src/test,./cpp/include/cudf_test/cxxopts.hpp"
# ignore short words, and typename parameters like OffsetT
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
ignore-words-list = "inout,unparseable,falsy,couldn,Couldn"
ignore-words-list = "inout,unparseable,falsy,couldn,Couldn,thirdparty"
builtin = "clear"
quiet-level = 3

[tool.ruff]
line-length = 79
target-version = "py310"

[tool.ruff.lint]
typing-modules = ["cudf._typing"]
Expand Down Expand Up @@ -94,17 +95,35 @@ select = [
"UP035",
# usage of legacy `np.random` function calls
"NPY002",
# Ruff-specific rules
"RUF",
]
ignore = [
# whitespace before :
"E203",
# line-too-long (due to Copyright header)
"E501",
# type-comparison, disabled because we compare types to numpy dtypes
"E721",
# String contains ambiguous character
"RUF001",
# Parenthesize `a and b` expressions when chaining `and` and `or`
# together, to make the precedence clear
"RUF021",
# Mutable class attributes should be annotated with
# `typing.ClassVar`
"RUF012",
]
fixable = ["ALL"]
exclude = [
# TODO: Remove this in a follow-up where we fix __all__.
"__init__.py",
# TODO: https://github.com/rapidsai/cudf/issues/17461
"**/*.ipynb",
]

[tool.ruff.format]
exclude = [
# TODO: https://github.com/rapidsai/cudf/issues/17461
"**/*.ipynb",
]

[tool.ruff.lint.per-file-ignores]
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/benchmarks/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def pytest_collection_modifyitems(session, config, items):
items[:] = list(filter(is_pandas_compatible, items))

else:
import cupy # noqa: W0611, F401
import cupy # noqa: F401

import cudf # noqa: W0611, F401
import cudf # noqa: F401

def pytest_collection_modifyitems(session, config, items):
pass
Expand Down
16 changes: 7 additions & 9 deletions python/cudf/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,16 @@
# into the main repo.
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common"))

# Turn off isort until we upgrade to 5.8.0
# https://github.com/pycqa/isort/issues/1594
from config import ( # noqa: W0611, E402, F401
from config import (
NUM_COLS,
NUM_ROWS,
collect_ignore,
cudf, # noqa: W0611, E402, F401
pytest_collection_modifyitems,
pytest_sessionfinish,
pytest_sessionstart,
collect_ignore, # noqa: F401
cudf,
pytest_collection_modifyitems, # noqa: F401
pytest_sessionfinish, # noqa: F401
pytest_sessionstart, # noqa: F401
)
from utils import ( # noqa: E402
from utils import (
OrderedSet,
collapse_fixtures,
column_generators,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@


__all__ = [
"NA",
"BaseIndex",
"CategoricalDtype",
"CategoricalIndex",
Expand All @@ -114,7 +115,6 @@
"IntervalIndex",
"ListDtype",
"MultiIndex",
"NA",
"NaT",
"RangeIndex",
"Scalar",
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def start(self):
else:
self._data_handler.set_rand_params(self.params)
kwargs = self._data_handler._current_params["test_kwargs"]
logging.info(f"Parameters passed: {str(kwargs)}")
logging.info(f"Parameters passed: {kwargs!s}")
self._target(file_name, **kwargs)
except KeyboardInterrupt:
logging.info(
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def memory_usage(self, deep=False):
"""
raise NotImplementedError

def tolist(self): # noqa: D102
def tolist(self):
raise TypeError(
"cuDF does not support conversion to host memory "
"via the `tolist()` method. Consider using "
Expand All @@ -148,7 +148,7 @@ def name(self):
raise NotImplementedError

@property # type: ignore
def ndim(self) -> int: # noqa: D401
def ndim(self) -> int:
"""Number of dimensions of the underlying data, by definition 1."""
return 1

Expand Down Expand Up @@ -265,7 +265,7 @@ def get_loc(self, key):
slice(1, 3, None)
>>> multi_index.get_loc(('b', 'e'))
1
""" # noqa: E501
"""

def max(self):
"""The maximum value of the index."""
Expand Down Expand Up @@ -1473,7 +1473,7 @@ def _intersection(self, other, sort=None):
._data
)

if sort is {None, True} and len(other):
if sort in {None, True} and len(other):
return intersection_result.sort_values()
return intersection_result

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/buffer/spill_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_rmm_memory_resource_stack(
"""

if hasattr(mr, "upstream_mr"):
return [mr] + get_rmm_memory_resource_stack(mr.upstream_mr)
return [mr, *get_rmm_memory_resource_stack(mr.upstream_mr)]
return [mr]


Expand Down Expand Up @@ -275,7 +275,7 @@ def _out_of_memory_handle(self, nbytes: int, *, retry_once=True) -> bool:
print(
f"[WARNING] RMM allocation of {format_bytes(nbytes)} bytes "
"failed, spill-on-demand couldn't find any device memory to "
f"spill:\n{repr(self)}\ntraceback:\n{get_traceback()}\n"
f"spill:\n{self!r}\ntraceback:\n{get_traceback()}\n"
f"{self.statistics}"
)
return False # Since we didn't find anything to spill, we give up
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/buffer/spillable_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def __str__(self) -> str:
f"<{self.__class__.__name__} size={format_bytes(self._size)} "
f"spillable={self.spillable} exposed={self.exposed} "
f"num-spill-locks={len(self._spill_locks)} "
f"ptr={ptr_info} owner={repr(self._owner)}>"
f"ptr={ptr_info} owner={self._owner!r}>"
)


Expand Down
48 changes: 35 additions & 13 deletions python/cudf/cudf/core/column/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

"""
isort: skip_file
"""

from cudf.core.column.categorical import CategoricalColumn
from cudf.core.column.column import (
ColumnBase,
Expand All @@ -15,17 +11,43 @@
deserialize_columns,
serialize_columns,
)
from cudf.core.column.datetime import DatetimeColumn # noqa: F401
from cudf.core.column.datetime import DatetimeTZColumn # noqa: F401
from cudf.core.column.lists import ListColumn # noqa: F401
from cudf.core.column.numerical import NumericalColumn # noqa: F401
from cudf.core.column.string import StringColumn # noqa: F401
from cudf.core.column.struct import StructColumn # noqa: F401
from cudf.core.column.timedelta import TimeDeltaColumn # noqa: F401
from cudf.core.column.interval import IntervalColumn # noqa: F401
from cudf.core.column.decimal import ( # noqa: F401
from cudf.core.column.datetime import (
DatetimeColumn,
DatetimeTZColumn,
)
from cudf.core.column.decimal import (
Decimal32Column,
Decimal64Column,
Decimal128Column,
DecimalBaseColumn,
)
from cudf.core.column.interval import IntervalColumn
from cudf.core.column.lists import ListColumn
from cudf.core.column.numerical import NumericalColumn
from cudf.core.column.string import StringColumn
from cudf.core.column.struct import StructColumn
from cudf.core.column.timedelta import TimeDeltaColumn

__all__ = [
"CategoricalColumn",
"ColumnBase",
"DatetimeColumn",
"DatetimeTZColumn",
"Decimal32Column",
"Decimal64Column",
"Decimal128Column",
"DecimalBaseColumn",
"IntervalColumn",
"ListColumn",
"NumericalColumn",
"StringColumn",
"StructColumn",
"TimeDeltaColumn",
"as_column",
"build_column",
"column_empty",
"column_empty_like",
"concat_columns",
"deserialize_columns",
"serialize_columns",
]
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ def find_and_replace(
if len(replacement_col) == replacement_col.null_count:
replacement_col = replacement_col.astype(self.categories.dtype)

if type(to_replace_col) != type(replacement_col):
if type(to_replace_col) is not type(replacement_col):
raise TypeError(
f"to_replace and value should be of same types,"
f"got to_replace dtype: {to_replace_col.dtype} and "
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import pylibcudf as plc

import cudf
import cudf.core.column.column as column
import cudf.core.column.string as string
from cudf import _lib as libcudf
from cudf.core._compat import PANDAS_GE_220
from cudf.core._internals import binaryop, unary
Expand All @@ -28,7 +30,7 @@
get_tz_data,
)
from cudf.core.buffer import Buffer, acquire_spill_lock
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.core.column.column import ColumnBase, as_column
from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
from cudf.utils.dtypes import _get_base_dtype
from cudf.utils.utils import (
Expand Down
Loading

0 comments on commit a9b9631

Please sign in to comment.