rapidsai · ajschmidt8 · Mar 28, 2022 · Mar 28, 2022 · Mar 28, 2022 · Mar 28, 2022
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -28,7 +28,7 @@ repos:
                 args: ["--settings-path=python/dask_cudf/setup.cfg"]
                 files: python/dask_cudf/.*
       - repo: https://github.com/psf/black
-        rev: 19.10b0
+        rev: 22.3.0
         hooks:
               - id: black
                 files: python/.*

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import copy
 import io
@@ -73,7 +73,7 @@ def generate_input(self):
                 self, dtypes_list
             )
             self._current_params["dtypes_meta"] = dtypes_meta
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             self._current_params["seed"] = seed
             self._current_params["num_rows"] = num_rows
             self._current_params["num_cols"] = num_cols

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import logging
 import random
@@ -50,7 +50,7 @@ def generate_input(self):
                 seed,
             ) = self.get_next_regression_params()
         else:
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             random.seed(seed)
             dtypes_list = list(cudf.utils.dtypes.ALL_TYPES)
             dtypes_meta, num_rows, num_cols = _generate_rand_meta(
@@ -155,7 +155,7 @@ def generate_input(self):
                 seed,
             ) = self.get_next_regression_params()
         else:
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             random.seed(seed)
             dtypes_list = list(cudf.utils.dtypes.ALL_TYPES)
             dtypes_meta, num_rows, num_cols = _generate_rand_meta(

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import datetime
 import json
@@ -60,10 +60,12 @@ def write_crash(self, error):
         error_file_name = datetime.datetime.now().__str__()
         if self._crash_dir:
             crash_path = os.path.join(
-                self._crash_dir, error_file_name + "_crash.json",
+                self._crash_dir,
+                error_file_name + "_crash.json",
             )
             crash_log_path = os.path.join(
-                self._crash_dir, error_file_name + "_crash.log",
+                self._crash_dir,
+                error_file_name + "_crash.log",
             )
         else:
             crash_path = error_file_name + "_crash.json"

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import logging
 import random
@@ -65,7 +65,7 @@ def generate_input(self):
                 seed,
             ) = self.get_next_regression_params()
         else:
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             random.seed(seed)
             dtypes_list = list(
                 cudf.utils.dtypes.ALL_TYPES
@@ -140,7 +140,7 @@ def generate_input(self):
                 seed,
             ) = self.get_next_regression_params()
         else:
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             random.seed(seed)
             dtypes_list = list(
                 cudf.utils.dtypes.ALL_TYPES

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import copy
 import io
@@ -69,7 +69,7 @@ def generate_input(self):
             )
 
             self._current_params["dtypes_meta"] = dtypes_meta
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             self._current_params["seed"] = seed
             self._current_params["num_rows"] = num_rows
             self._current_params["num_cols"] = num_cols
@@ -188,7 +188,7 @@ def generate_input(self):
                 self, dtypes_list
             )
             self._current_params["dtypes_meta"] = dtypes_meta
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             self._current_params["seed"] = seed
             self._current_params["num_rows"] = num_rows
             self._current_params["num_cols"] = num_cols

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import logging
 import random
@@ -64,7 +64,7 @@ def generate_input(self):
                 self, dtypes_list
             )
             self._current_params["dtypes_meta"] = dtypes_meta
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             self._current_params["seed"] = seed
             self._current_params["num_rows"] = num_rows
             self._current_params["num_cols"] = num_cols
@@ -139,7 +139,7 @@ def generate_input(self):
                 seed,
             ) = self.get_next_regression_params()
         else:
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
             random.seed(seed)
             dtypes_list = list(
                 cudf.utils.dtypes.ALL_TYPES

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import sys
 
@@ -91,10 +91,14 @@ def parquet_writer_test_rowgroup_index_compression(
     gdf = cudf.from_pandas(pdf)
 
     pdf.to_parquet(
-        pd_file_name, compression=compression, row_group_size=row_group_size,
+        pd_file_name,
+        compression=compression,
+        row_group_size=row_group_size,
     )
     gdf.to_parquet(
-        gd_file_name, compression=compression, row_group_size=row_group_size,
+        gd_file_name,
+        compression=compression,
+        row_group_size=row_group_size,
     )
 
     actual = cudf.read_parquet(gd_file_name)

@@ -32,94 +32,46 @@ class Column:
         offset: int = None,
         null_count: int = None,
         children: Tuple[ColumnBase, ...] = (),
-    ) -> None:
-        ...
-
+    ) -> None: ...
     @property
-    def base_size(self) -> int:
-        ...
-
+    def base_size(self) -> int: ...
     @property
-    def dtype(self) -> DtypeObj:
-        ...
-
+    def dtype(self) -> DtypeObj: ...
     @property
-    def size(self) -> int:
-        ...
-
+    def size(self) -> int: ...
     @property
-    def base_data(self) -> Optional[Buffer]:
-        ...
-
+    def base_data(self) -> Optional[Buffer]: ...
     @property
-    def base_data_ptr(self) -> int:
-        ...
-
+    def base_data_ptr(self) -> int: ...
     @property
-    def data(self) -> Optional[Buffer]:
-        ...
-
+    def data(self) -> Optional[Buffer]: ...
     @property
-    def data_ptr(self) -> int:
-        ...
-
-    def set_base_data(self, value: Buffer) -> None:
-        ...
-
+    def data_ptr(self) -> int: ...
+    def set_base_data(self, value: Buffer) -> None: ...
     @property
-    def nullable(self) -> bool:
-        ...
-
-    def has_nulls(self, include_nan: bool=False) -> bool:
-        ...
-
+    def nullable(self) -> bool: ...
+    def has_nulls(self, include_nan: bool = False) -> bool: ...
     @property
-    def base_mask(self) -> Optional[Buffer]:
-        ...
-
+    def base_mask(self) -> Optional[Buffer]: ...
     @property
-    def base_mask_ptr(self) -> int:
-        ...
-
+    def base_mask_ptr(self) -> int: ...
     @property
-    def mask(self) -> Optional[Buffer]:
-        ...
-
+    def mask(self) -> Optional[Buffer]: ...
     @property
-    def mask_ptr(self) -> int:
-        ...
-
-    def set_base_mask(self, value: Optional[Buffer]) -> None:
-        ...
-
-    def set_mask(self: T, value: Optional[Buffer]) -> T:
-        ...
-
+    def mask_ptr(self) -> int: ...
+    def set_base_mask(self, value: Optional[Buffer]) -> None: ...
+    def set_mask(self: T, value: Optional[Buffer]) -> T: ...
     @property
-    def null_count(self) -> int:
-        ...
-
+    def null_count(self) -> int: ...
     @property
-    def offset(self) -> int:
-        ...
-
+    def offset(self) -> int: ...
     @property
-    def base_children(self) -> Tuple[ColumnBase, ...]:
-        ...
-
+    def base_children(self) -> Tuple[ColumnBase, ...]: ...
     @property
-    def children(self) -> Tuple[ColumnBase, ...]:
-        ...
-
-    def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None:
-        ...
-
-    def _mimic_inplace(self, other_col: ColumnBase, inplace=False) -> Optional[ColumnBase]:
-        ...
-
+    def children(self) -> Tuple[ColumnBase, ...]: ...
+    def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None: ...
+    def _mimic_inplace(
+        self, other_col: ColumnBase, inplace=False
+    ) -> Optional[ColumnBase]: ...
     @staticmethod
-    def from_scalar(
-        val: ScalarLike,
-        size: int
-    ) -> ColumnBase:  # TODO: This should be Scalar, not ScalarLike
-        ...
+    def from_scalar(val: ScalarLike, size: int) -> ColumnBase: ...
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import cudf  # noqa: F401
 from cudf.core.abc import Serializable
 
@@ -26,7 +28,6 @@ def dask_deserialize_cudf_object(header, frames):
         with log_errors():
             return Serializable.host_deserialize(header, frames)
 
-
 except ImportError:
     # distributed is probably not installed on the system
     pass
@@ -1389,7 +1389,9 @@ def _constructor_expanddim(self):
         return cudf.MultiIndex
 
     def drop_duplicates(
-        self, keep="first", nulls_are_equal=True,
+        self,
+        keep="first",
+        nulls_are_equal=True,
     ):
         """
         Drop duplicate rows in index.
@@ -1435,7 +1437,11 @@ def dropna(self, how="any"):
         ]
 
         return self._from_columns_like_self(
-            drop_nulls(data_columns, how=how, keys=range(len(data_columns)),),
+            drop_nulls(
+                data_columns,
+                how=how,
+                keys=range(len(data_columns)),
+            ),
             self._column_names,
         )
 

@@ -93,10 +93,10 @@ def _check_and_cast_columns_with_other(
 
 
 def _normalize_columns_and_scalars_type(
-    frame: Frame, other: Any, inplace: bool = False,
-) -> Tuple[
-    Union[Frame, ColumnLike], Any,
-]:
+    frame: Frame,
+    other: Any,
+    inplace: bool = False,
+) -> Tuple[Union[Frame, ColumnLike], Any]:
     """
     Try to normalize the other's dtypes as per frame.
 
@@ -176,7 +176,10 @@ def _normalize_columns_and_scalars_type(
 
 
 def where(
-    frame: Frame, cond: Any, other: Any = None, inplace: bool = False,
+    frame: Frame,
+    cond: Any,
+    other: Any = None,
+    inplace: bool = False,
 ) -> Optional[Union[Frame]]:
     """
     Replace values where the condition is False.
@@ -266,9 +269,10 @@ def where(
             # as `cond` has no column names.
             cond._set_column_names_like(frame)
 
-        (source_df, others,) = _normalize_columns_and_scalars_type(
-            frame, other
-        )
+        (
+            source_df,
+            others,
+        ) = _normalize_columns_and_scalars_type(frame, other)
         if isinstance(others, Frame):
             others = others._data.columns
 
@@ -340,9 +344,10 @@ def where(
                 """Array conditional must be same shape as self"""
             )
 
-        (input_col, other,) = _normalize_columns_and_scalars_type(
-            frame, other, inplace
-        )
+        (
+            input_col,
+            other,
+        ) = _normalize_columns_and_scalars_type(frame, other, inplace)
 
         if isinstance(input_col, cudf.core.column.CategoricalColumn):
             if cudf.api.types.is_scalar(other):