Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardize usage of collections.abc. #10679

Merged
merged 3 commits into from
Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import random
from collections import abc as abc
from collections import abc

import numpy as np

Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.map cimport map
Expand All @@ -19,9 +19,9 @@ import cudf

from cudf._lib.cpp.types cimport size_type

import collections.abc as abc
import errno
import os
from collections import abc
from enum import IntEnum
from io import BytesIO, StringIO

Expand Down Expand Up @@ -238,7 +238,7 @@ cdef csv_reader_options make_csv_reader_options(
"`parse_dates`: dictionaries are unsupported")
if not isinstance(parse_dates, abc.Iterable):
raise NotImplementedError(
"`parse_dates`: non-lists are unsupported")
"`parse_dates`: an iterable is required")
for col in parse_dates:
if isinstance(col, str):
c_parse_dates_names.push_back(str(col).encode())
Expand Down Expand Up @@ -279,7 +279,7 @@ cdef csv_reader_options make_csv_reader_options(
)
csv_reader_options_c.set_dtypes(c_dtypes_list)
csv_reader_options_c.set_parse_hex(c_hex_col_indexes)
elif isinstance(dtype, abc.Iterable):
elif isinstance(dtype, abc.Collection):
Copy link
Contributor Author

@bdice bdice Apr 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We must use Collection because an Iterable is not guaranteed to support len(dtype) below. Similarly in json.pyx.

c_dtypes_list.reserve(len(dtype))
for index, col_dtype in enumerate(dtype):
if col_dtype in CSV_HEX_TYPE_MAP:
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/json.pyx
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

# cython: boundscheck = False


import collections.abc as abc
import io
import os
from collections import abc

import cudf

Expand Down Expand Up @@ -82,15 +82,15 @@ cpdef read_json(object filepaths_or_buffers,
for k, v in dtype.items():
c_dtypes_map[str(k).encode()] = \
_get_cudf_data_type_from_dtype(v)
elif not isinstance(dtype, abc.Iterable):
raise TypeError("`dtype` must be 'list like' or 'dict'")
else:
elif isinstance(dtype, abc.Collection):
is_list_like_dtypes = True
c_dtypes_list.reserve(len(dtype))
for col_dtype in dtype:
c_dtypes_list.push_back(
_get_cudf_data_type_from_dtype(
col_dtype))
else:
raise TypeError("`dtype` must be 'list like' or 'dict'")

cdef json_reader_options opts = move(
json_reader_options.builder(make_source_info(filepaths_or_buffers))
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from __future__ import annotations

from collections.abc import Sequence
from collections import abc
from functools import wraps
from inspect import isclass
from typing import List, Union
Expand Down Expand Up @@ -174,7 +174,7 @@ def is_list_like(obj):
bool
Return True if given object is list-like.
"""
return isinstance(obj, (Sequence, np.ndarray)) and not isinstance(
return isinstance(obj, (abc.Sequence, np.ndarray)) and not isinstance(
obj, (str, bytes)
)

Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/comm/gpuarrow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
from collections import OrderedDict
from collections.abc import Sequence
from collections import OrderedDict, abc

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -32,7 +31,7 @@ def __init__(self, source, schema=None):
self._open(source, schema)


class GpuArrowReader(Sequence):
class GpuArrowReader(abc.Sequence):
def __init__(self, schema, dev_ary):
self._table = CudaRecordBatchStreamReader(dev_ary, schema).read_all()

Expand Down
6 changes: 4 additions & 2 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

import pickle
from collections.abc import MutableSequence
from collections import abc
from functools import cached_property
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -1379,7 +1379,9 @@ def view(self, dtype: Dtype) -> ColumnBase:
)

@staticmethod
def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
def _concat(
objs: abc.MutableSequence[CategoricalColumn],
) -> CategoricalColumn:
# TODO: This function currently assumes it is being called from
# column.concat_columns, at least to the extent that all the
# preprocessing in that function has already been done. That should be
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

import itertools
from collections.abc import MutableMapping
from collections import abc
from functools import cached_property, reduce
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -78,7 +78,7 @@ def _to_flat_dict(d):
return {k: v for k, v in _to_flat_dict_inner(d)}


class ColumnAccessor(MutableMapping):
class ColumnAccessor(abc.MutableMapping):
"""
Parameters
----------
Expand All @@ -99,7 +99,7 @@ class ColumnAccessor(MutableMapping):

def __init__(
self,
data: Union[MutableMapping, ColumnAccessor] = None,
data: Union[abc.MutableMapping, ColumnAccessor] = None,
multiindex: bool = False,
level_names=None,
):
Expand Down Expand Up @@ -213,7 +213,7 @@ def columns(self) -> Tuple[ColumnBase, ...]:
return tuple(self.values())

@cached_property
def _grouped_data(self) -> MutableMapping:
def _grouped_data(self) -> abc.MutableMapping:
"""
If self.multiindex is True,
return the underlying mapping as a nested mapping.
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/cut.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

from collections.abc import Sequence
from collections import abc

import cupy
import numpy as np
Expand Down Expand Up @@ -140,7 +140,7 @@ def cut(
)

# bins can either be an int, sequence of scalars or an intervalIndex
if isinstance(bins, Sequence):
if isinstance(bins, abc.Sequence):
if len(set(bins)) is not len(bins):
if duplicates == "raise":
raise ValueError(
Expand All @@ -158,7 +158,7 @@ def cut(

# create bins if given an int or single scalar
if not isinstance(bins, pd.IntervalIndex):
if not isinstance(bins, (Sequence)):
if not isinstance(bins, (abc.Sequence)):
if isinstance(
x, (pd.Series, cudf.Series, np.ndarray, cupy.ndarray)
):
Expand Down
25 changes: 13 additions & 12 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import pickle
import sys
import warnings
from collections import defaultdict
from collections.abc import Iterable, Mapping, Sequence
from collections import abc, defaultdict
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -1857,7 +1856,7 @@ def _make_operands_and_index_for_binop(
Optional[BaseIndex],
]:
# Check built-in types first for speed.
if isinstance(other, (list, dict, Sequence, Mapping)):
if isinstance(other, (list, dict, abc.Sequence, abc.Mapping)):
warnings.warn(
"Binary operations between host objects such as "
f"{type(other)} and cudf.DataFrame are deprecated and will be "
Expand All @@ -1878,7 +1877,7 @@ def _make_operands_and_index_for_binop(

if _is_scalar_or_zero_d_array(other):
rhs = {name: other for name in self._data}
elif isinstance(other, (list, Sequence)):
elif isinstance(other, (list, abc.Sequence)):
rhs = {name: o for (name, o) in zip(self._data, other)}
elif isinstance(other, Series):
rhs = dict(zip(other.index.values_host, other.values_host))
Expand Down Expand Up @@ -1907,7 +1906,7 @@ def _make_operands_and_index_for_binop(
# the fill value.
left_default = fill_value

if not isinstance(rhs, (dict, Mapping)):
if not isinstance(rhs, (dict, abc.Mapping)):
return NotImplemented, None

operands = {
Expand Down Expand Up @@ -2961,7 +2960,9 @@ def agg(self, aggs, axis=None):
if axis == 0 or axis is not None:
raise NotImplementedError("axis not implemented yet")

if isinstance(aggs, Iterable) and not isinstance(aggs, (str, dict)):
if isinstance(aggs, abc.Iterable) and not isinstance(
aggs, (str, dict)
):
result = DataFrame()
# TODO : Allow simultaneous pass for multi-aggregation as
# a future optimization
Expand Down Expand Up @@ -2997,13 +2998,13 @@ def agg(self, aggs, axis=None):
f"'Series' object"
)
result[key] = getattr(col, value)()
elif all([isinstance(val, Iterable) for val in aggs.values()]):
elif all([isinstance(val, abc.Iterable) for val in aggs.values()]):
idxs = set()
for val in aggs.values():
if isinstance(val, Iterable):
idxs.update(val)
elif isinstance(val, str):
if isinstance(val, str):
idxs.add(val)
elif isinstance(val, abc.Iterable):
idxs.update(val)
idxs = sorted(list(idxs))
for agg in idxs:
if agg is callable:
Expand All @@ -3017,7 +3018,7 @@ def agg(self, aggs, axis=None):
len(idxs), dtype=col.dtype, masked=True
)
ans = cudf.Series(data=col_empty, index=idxs)
if isinstance(aggs.get(key), Iterable):
if isinstance(aggs.get(key), abc.Iterable):
# TODO : Allow simultaneous pass for multi-aggregation
# as a future optimization
for agg in aggs.get(key):
Expand Down Expand Up @@ -6157,7 +6158,7 @@ def _sample_axis_1(
def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: Iterable[str],
column_names: abc.Iterable[str],
index_names: Optional[List[str]] = None,
) -> DataFrame:
result = super()._from_columns_like_self(
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/df_protocol.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

import collections
import enum
from collections import abc
from typing import (
Any,
Dict,
Expand Down Expand Up @@ -569,13 +569,13 @@ def get_columns(self) -> Iterable[_CuDFColumn]:
]

def select_columns(self, indices: Sequence[int]) -> "_CuDFDataFrame":
if not isinstance(indices, collections.abc.Sequence):
if not isinstance(indices, abc.Sequence):
raise ValueError("`indices` is not a sequence")

return _CuDFDataFrame(self._df.iloc[:, indices])

def select_columns_by_name(self, names: Sequence[str]) -> "_CuDFDataFrame":
if not isinstance(names, collections.Sequence):
if not isinstance(names, abc.Sequence):
raise ValueError("`names` is not a sequence")

return _CuDFDataFrame(
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import collections
import itertools
import pickle
import warnings
from collections import abc
from functools import cached_property
from typing import Any, Iterable, List, Tuple, Union

Expand Down Expand Up @@ -1638,7 +1638,7 @@ def _handle_by_or_level(self, by=None, level=None):
self._handle_series(by)
elif isinstance(by, cudf.BaseIndex):
self._handle_index(by)
elif isinstance(by, collections.abc.Mapping):
elif isinstance(by, abc.Mapping):
self._handle_mapping(by)
elif isinstance(by, Grouper):
self._handle_grouper(by)
Expand Down Expand Up @@ -1757,7 +1757,7 @@ def _is_multi_agg(aggs):
Returns True if more than one aggregation is performed
on any of the columns as specified in `aggs`.
"""
if isinstance(aggs, collections.abc.Mapping):
if isinstance(aggs, abc.Mapping):
return any(is_list_like(agg) for agg in aggs.values())
if is_list_like(aggs):
return True
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/join/_join_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from __future__ import annotations

import collections
import warnings
from collections import abc
from typing import TYPE_CHECKING, Any, Tuple, cast

import numpy as np
Expand Down Expand Up @@ -166,7 +166,7 @@ def _match_categorical_dtypes_both(


def _coerce_to_tuple(obj):
if isinstance(obj, collections.abc.Iterable) and not isinstance(obj, str):
if isinstance(obj, abc.Iterable) and not isinstance(obj, str):
return tuple(obj)
else:
return (obj,)
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import itertools
import numbers
import pickle
from collections.abc import Sequence
from collections import abc
from functools import cached_property
from numbers import Integral
from typing import Any, List, MutableMapping, Tuple, Union
Expand Down Expand Up @@ -95,7 +95,7 @@ def __init__(
if len(levels) == 0:
raise ValueError("Must pass non-zero number of levels/codes")
if not isinstance(codes, cudf.DataFrame) and not isinstance(
codes[0], (Sequence, np.ndarray)
codes[0], (abc.Sequence, np.ndarray)
):
raise TypeError("Codes is not a Sequence of sequences")

Expand Down Expand Up @@ -912,7 +912,7 @@ def deserialize(cls, header, frames):
def __getitem__(self, index):
flatten = isinstance(index, int)

if isinstance(index, (Integral, Sequence)):
if isinstance(index, (Integral, abc.Sequence)):
index = np.array(index)
elif isinstance(index, slice):
start, stop, step = index.indices(len(self))
Expand Down
Loading