rapidsai · rapids-bot · Apr 18, 2022 · Apr 18, 2022 · Apr 18, 2022 · Apr 18, 2022
@@ -2,7 +2,7 @@
 
 import logging
 import random
-from collections import abc as abc
+from collections import abc
 
 import numpy as np
 

@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.map cimport map
@@ -19,9 +19,9 @@ import cudf
 
 from cudf._lib.cpp.types cimport size_type
 
-import collections.abc as abc
 import errno
 import os
+from collections import abc
 from enum import IntEnum
 from io import BytesIO, StringIO
 
@@ -238,7 +238,7 @@ cdef csv_reader_options make_csv_reader_options(
                 "`parse_dates`: dictionaries are unsupported")
         if not isinstance(parse_dates, abc.Iterable):
             raise NotImplementedError(
-                "`parse_dates`: non-lists are unsupported")
+                "`parse_dates`: an iterable is required")
         for col in parse_dates:
             if isinstance(col, str):
                 c_parse_dates_names.push_back(str(col).encode())
@@ -279,7 +279,7 @@ cdef csv_reader_options make_csv_reader_options(
             )
             csv_reader_options_c.set_dtypes(c_dtypes_list)
             csv_reader_options_c.set_parse_hex(c_hex_col_indexes)
-        elif isinstance(dtype, abc.Iterable):
+        elif isinstance(dtype, abc.Collection):
             c_dtypes_list.reserve(len(dtype))
             for index, col_dtype in enumerate(dtype):
                 if col_dtype in CSV_HEX_TYPE_MAP:

@@ -1,11 +1,11 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 # cython: boundscheck = False
 
 
-import collections.abc as abc
 import io
 import os
+from collections import abc
 
 import cudf
 
@@ -82,15 +82,15 @@ cpdef read_json(object filepaths_or_buffers,
             for k, v in dtype.items():
                 c_dtypes_map[str(k).encode()] = \
                     _get_cudf_data_type_from_dtype(v)
-        elif not isinstance(dtype, abc.Iterable):
-            raise TypeError("`dtype` must be 'list like' or 'dict'")
-        else:
+        elif isinstance(dtype, abc.Collection):
             is_list_like_dtypes = True
             c_dtypes_list.reserve(len(dtype))
             for col_dtype in dtype:
                 c_dtypes_list.push_back(
                     _get_cudf_data_type_from_dtype(
                         col_dtype))
+        else:
+            raise TypeError("`dtype` must be 'list like' or 'dict'")
 
     cdef json_reader_options opts = move(
         json_reader_options.builder(make_source_info(filepaths_or_buffers))

@@ -4,7 +4,7 @@
 
 from __future__ import annotations
 
-from collections.abc import Sequence
+from collections import abc
 from functools import wraps
 from inspect import isclass
 from typing import List, Union
@@ -174,7 +174,7 @@ def is_list_like(obj):
     bool
         Return True if given object is list-like.
     """
-    return isinstance(obj, (Sequence, np.ndarray)) and not isinstance(
+    return isinstance(obj, (abc.Sequence, np.ndarray)) and not isinstance(
         obj, (str, bytes)
     )
 

@@ -1,6 +1,5 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
-from collections import OrderedDict
-from collections.abc import Sequence
+from collections import OrderedDict, abc
 
 import numpy as np
 import pandas as pd
@@ -32,7 +31,7 @@ def __init__(self, source, schema=None):
         self._open(source, schema)
 
 
-class GpuArrowReader(Sequence):
+class GpuArrowReader(abc.Sequence):
     def __init__(self, schema, dev_ary):
         self._table = CudaRecordBatchStreamReader(dev_ary, schema).read_all()
 

@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import pickle
-from collections.abc import MutableSequence
+from collections import abc
 from functools import cached_property
 from typing import (
     TYPE_CHECKING,
@@ -1379,7 +1379,9 @@ def view(self, dtype: Dtype) -> ColumnBase:
         )
 
     @staticmethod
-    def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
+    def _concat(
+        objs: abc.MutableSequence[CategoricalColumn],
+    ) -> CategoricalColumn:
         # TODO: This function currently assumes it is being called from
         # column.concat_columns, at least to the extent that all the
         # preprocessing in that function has already been done. That should be

@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import itertools
-from collections.abc import MutableMapping
+from collections import abc
 from functools import cached_property, reduce
 from typing import (
     TYPE_CHECKING,
@@ -78,7 +78,7 @@ def _to_flat_dict(d):
     return {k: v for k, v in _to_flat_dict_inner(d)}
 
 
-class ColumnAccessor(MutableMapping):
+class ColumnAccessor(abc.MutableMapping):
     """
     Parameters
     ----------
@@ -99,7 +99,7 @@ class ColumnAccessor(MutableMapping):
 
     def __init__(
         self,
-        data: Union[MutableMapping, ColumnAccessor] = None,
+        data: Union[abc.MutableMapping, ColumnAccessor] = None,
         multiindex: bool = False,
         level_names=None,
     ):
@@ -213,7 +213,7 @@ def columns(self) -> Tuple[ColumnBase, ...]:
         return tuple(self.values())
 
     @cached_property
-    def _grouped_data(self) -> MutableMapping:
+    def _grouped_data(self) -> abc.MutableMapping:
         """
         If self.multiindex is True,
         return the underlying mapping as a nested mapping.

@@ -1,6 +1,6 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from collections.abc import Sequence
+from collections import abc
 
 import cupy
 import numpy as np
@@ -140,7 +140,7 @@ def cut(
                 )
 
     # bins can either be an int, sequence of scalars or an intervalIndex
-    if isinstance(bins, Sequence):
+    if isinstance(bins, abc.Sequence):
         if len(set(bins)) is not len(bins):
             if duplicates == "raise":
                 raise ValueError(
@@ -158,7 +158,7 @@ def cut(
 
     # create bins if given an int or single scalar
     if not isinstance(bins, pd.IntervalIndex):
-        if not isinstance(bins, (Sequence)):
+        if not isinstance(bins, (abc.Sequence)):
             if isinstance(
                 x, (pd.Series, cudf.Series, np.ndarray, cupy.ndarray)
             ):

@@ -9,8 +9,7 @@
 import pickle
 import sys
 import warnings
-from collections import defaultdict
-from collections.abc import Iterable, Mapping, Sequence
+from collections import abc, defaultdict
 from typing import (
     Any,
     Callable,
@@ -1857,7 +1856,7 @@ def _make_operands_and_index_for_binop(
         Optional[BaseIndex],
     ]:
         # Check built-in types first for speed.
-        if isinstance(other, (list, dict, Sequence, Mapping)):
+        if isinstance(other, (list, dict, abc.Sequence, abc.Mapping)):
             warnings.warn(
                 "Binary operations between host objects such as "
                 f"{type(other)} and cudf.DataFrame are deprecated and will be "
@@ -1878,7 +1877,7 @@ def _make_operands_and_index_for_binop(
 
         if _is_scalar_or_zero_d_array(other):
             rhs = {name: other for name in self._data}
-        elif isinstance(other, (list, Sequence)):
+        elif isinstance(other, (list, abc.Sequence)):
             rhs = {name: o for (name, o) in zip(self._data, other)}
         elif isinstance(other, Series):
             rhs = dict(zip(other.index.values_host, other.values_host))
@@ -1907,7 +1906,7 @@ def _make_operands_and_index_for_binop(
             # the fill value.
             left_default = fill_value
 
-        if not isinstance(rhs, (dict, Mapping)):
+        if not isinstance(rhs, (dict, abc.Mapping)):
             return NotImplemented, None
 
         operands = {
@@ -2961,7 +2960,9 @@ def agg(self, aggs, axis=None):
         if axis == 0 or axis is not None:
             raise NotImplementedError("axis not implemented yet")
 
-        if isinstance(aggs, Iterable) and not isinstance(aggs, (str, dict)):
+        if isinstance(aggs, abc.Iterable) and not isinstance(
+            aggs, (str, dict)
+        ):
             result = DataFrame()
             # TODO : Allow simultaneous pass for multi-aggregation as
             # a future optimization
@@ -2997,13 +2998,13 @@ def agg(self, aggs, axis=None):
                             f"'Series' object"
                         )
                     result[key] = getattr(col, value)()
-            elif all([isinstance(val, Iterable) for val in aggs.values()]):
+            elif all([isinstance(val, abc.Iterable) for val in aggs.values()]):
                 idxs = set()
                 for val in aggs.values():
-                    if isinstance(val, Iterable):
-                        idxs.update(val)
-                    elif isinstance(val, str):
+                    if isinstance(val, str):
                         idxs.add(val)
+                    elif isinstance(val, abc.Iterable):
+                        idxs.update(val)
                 idxs = sorted(list(idxs))
                 for agg in idxs:
                     if agg is callable:
@@ -3017,7 +3018,7 @@ def agg(self, aggs, axis=None):
                         len(idxs), dtype=col.dtype, masked=True
                     )
                     ans = cudf.Series(data=col_empty, index=idxs)
-                    if isinstance(aggs.get(key), Iterable):
+                    if isinstance(aggs.get(key), abc.Iterable):
                         # TODO : Allow simultaneous pass for multi-aggregation
                         # as a future optimization
                         for agg in aggs.get(key):
@@ -6157,7 +6158,7 @@ def _sample_axis_1(
     def _from_columns_like_self(
         self,
         columns: List[ColumnBase],
-        column_names: Iterable[str],
+        column_names: abc.Iterable[str],
         index_names: Optional[List[str]] = None,
     ) -> DataFrame:
         result = super()._from_columns_like_self(

@@ -1,7 +1,7 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-import collections
 import enum
+from collections import abc
 from typing import (
     Any,
     Dict,
@@ -569,13 +569,13 @@ def get_columns(self) -> Iterable[_CuDFColumn]:
         ]
 
     def select_columns(self, indices: Sequence[int]) -> "_CuDFDataFrame":
-        if not isinstance(indices, collections.abc.Sequence):
+        if not isinstance(indices, abc.Sequence):
             raise ValueError("`indices` is not a sequence")
 
         return _CuDFDataFrame(self._df.iloc[:, indices])
 
     def select_columns_by_name(self, names: Sequence[str]) -> "_CuDFDataFrame":
-        if not isinstance(names, collections.Sequence):
+        if not isinstance(names, abc.Sequence):
             raise ValueError("`names` is not a sequence")
 
         return _CuDFDataFrame(

@@ -1,9 +1,9 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import collections
 import itertools
 import pickle
 import warnings
+from collections import abc
 from functools import cached_property
 from typing import Any, Iterable, List, Tuple, Union
 
@@ -1638,7 +1638,7 @@ def _handle_by_or_level(self, by=None, level=None):
                     self._handle_series(by)
                 elif isinstance(by, cudf.BaseIndex):
                     self._handle_index(by)
-                elif isinstance(by, collections.abc.Mapping):
+                elif isinstance(by, abc.Mapping):
                     self._handle_mapping(by)
                 elif isinstance(by, Grouper):
                     self._handle_grouper(by)
@@ -1757,7 +1757,7 @@ def _is_multi_agg(aggs):
     Returns True if more than one aggregation is performed
     on any of the columns as specified in `aggs`.
     """
-    if isinstance(aggs, collections.abc.Mapping):
+    if isinstance(aggs, abc.Mapping):
         return any(is_list_like(agg) for agg in aggs.values())
     if is_list_like(aggs):
         return True

@@ -2,8 +2,8 @@
 
 from __future__ import annotations
 
-import collections
 import warnings
+from collections import abc
 from typing import TYPE_CHECKING, Any, Tuple, cast
 
 import numpy as np
@@ -166,7 +166,7 @@ def _match_categorical_dtypes_both(
 
 
 def _coerce_to_tuple(obj):
-    if isinstance(obj, collections.abc.Iterable) and not isinstance(obj, str):
+    if isinstance(obj, abc.Iterable) and not isinstance(obj, str):
         return tuple(obj)
     else:
         return (obj,)
@@ -5,7 +5,7 @@
 import itertools
 import numbers
 import pickle
-from collections.abc import Sequence
+from collections import abc
 from functools import cached_property
 from numbers import Integral
 from typing import Any, List, MutableMapping, Tuple, Union
@@ -95,7 +95,7 @@ def __init__(
         if len(levels) == 0:
             raise ValueError("Must pass non-zero number of levels/codes")
         if not isinstance(codes, cudf.DataFrame) and not isinstance(
-            codes[0], (Sequence, np.ndarray)
+            codes[0], (abc.Sequence, np.ndarray)
         ):
             raise TypeError("Codes is not a Sequence of sequences")
 
@@ -912,7 +912,7 @@ def deserialize(cls, header, frames):
     def __getitem__(self, index):
         flatten = isinstance(index, int)
 
-        if isinstance(index, (Integral, Sequence)):
+        if isinstance(index, (Integral, abc.Sequence)):
             index = np.array(index)
         elif isinstance(index, slice):
             start, stop, step = index.indices(len(self))