Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pre-commit to run black 22.3.0 #10523

Merged
merged 5 commits into from
Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ repos:
args: ["--settings-path=python/dask_cudf/setup.cfg"]
files: python/dask_cudf/.*
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 22.3.0
hooks:
- id: black
files: python/.*
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_fuzz_testing/avro.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import copy
import io
Expand Down Expand Up @@ -73,7 +73,7 @@ def generate_input(self):
self, dtypes_list
)
self._current_params["dtypes_meta"] = dtypes_meta
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
self._current_params["seed"] = seed
self._current_params["num_rows"] = num_rows
self._current_params["num_cols"] = num_cols
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_fuzz_testing/csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import logging
import random
Expand Down Expand Up @@ -50,7 +50,7 @@ def generate_input(self):
seed,
) = self.get_next_regression_params()
else:
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
random.seed(seed)
dtypes_list = list(cudf.utils.dtypes.ALL_TYPES)
dtypes_meta, num_rows, num_cols = _generate_rand_meta(
Expand Down Expand Up @@ -155,7 +155,7 @@ def generate_input(self):
seed,
) = self.get_next_regression_params()
else:
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
random.seed(seed)
dtypes_list = list(cudf.utils.dtypes.ALL_TYPES)
dtypes_meta, num_rows, num_cols = _generate_rand_meta(
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/_fuzz_testing/fuzzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import datetime
import json
Expand Down Expand Up @@ -60,10 +60,12 @@ def write_crash(self, error):
error_file_name = datetime.datetime.now().__str__()
if self._crash_dir:
crash_path = os.path.join(
self._crash_dir, error_file_name + "_crash.json",
self._crash_dir,
error_file_name + "_crash.json",
)
crash_log_path = os.path.join(
self._crash_dir, error_file_name + "_crash.log",
self._crash_dir,
error_file_name + "_crash.log",
)
else:
crash_path = error_file_name + "_crash.json"
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_fuzz_testing/json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import logging
import random
Expand Down Expand Up @@ -65,7 +65,7 @@ def generate_input(self):
seed,
) = self.get_next_regression_params()
else:
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
random.seed(seed)
dtypes_list = list(
cudf.utils.dtypes.ALL_TYPES
Expand Down Expand Up @@ -140,7 +140,7 @@ def generate_input(self):
seed,
) = self.get_next_regression_params()
else:
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
random.seed(seed)
dtypes_list = list(
cudf.utils.dtypes.ALL_TYPES
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_fuzz_testing/orc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import copy
import io
Expand Down Expand Up @@ -69,7 +69,7 @@ def generate_input(self):
)

self._current_params["dtypes_meta"] = dtypes_meta
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
self._current_params["seed"] = seed
self._current_params["num_rows"] = num_rows
self._current_params["num_cols"] = num_cols
Expand Down Expand Up @@ -188,7 +188,7 @@ def generate_input(self):
self, dtypes_list
)
self._current_params["dtypes_meta"] = dtypes_meta
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
self._current_params["seed"] = seed
self._current_params["num_rows"] = num_rows
self._current_params["num_cols"] = num_cols
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_fuzz_testing/parquet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import logging
import random
Expand Down Expand Up @@ -64,7 +64,7 @@ def generate_input(self):
self, dtypes_list
)
self._current_params["dtypes_meta"] = dtypes_meta
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
self._current_params["seed"] = seed
self._current_params["num_rows"] = num_rows
self._current_params["num_cols"] = num_cols
Expand Down Expand Up @@ -139,7 +139,7 @@ def generate_input(self):
seed,
) = self.get_next_regression_params()
else:
seed = random.randint(0, 2 ** 32 - 1)
seed = random.randint(0, 2**32 - 1)
random.seed(seed)
dtypes_list = list(
cudf.utils.dtypes.ALL_TYPES
Expand Down
10 changes: 7 additions & 3 deletions python/cudf/cudf/_fuzz_testing/tests/fuzz_test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import sys

Expand Down Expand Up @@ -91,10 +91,14 @@ def parquet_writer_test_rowgroup_index_compression(
gdf = cudf.from_pandas(pdf)

pdf.to_parquet(
pd_file_name, compression=compression, row_group_size=row_group_size,
pd_file_name,
compression=compression,
row_group_size=row_group_size,
)
gdf.to_parquet(
gd_file_name, compression=compression, row_group_size=row_group_size,
gd_file_name,
compression=compression,
row_group_size=row_group_size,
)

actual = cudf.read_parquet(gd_file_name)
Expand Down
100 changes: 26 additions & 74 deletions python/cudf/cudf/_lib/column.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -32,94 +32,46 @@ class Column:
offset: int = None,
null_count: int = None,
children: Tuple[ColumnBase, ...] = (),
) -> None:
...

) -> None: ...
@property
def base_size(self) -> int:
...

def base_size(self) -> int: ...
@property
def dtype(self) -> DtypeObj:
...

def dtype(self) -> DtypeObj: ...
@property
def size(self) -> int:
...

def size(self) -> int: ...
@property
def base_data(self) -> Optional[Buffer]:
...

def base_data(self) -> Optional[Buffer]: ...
@property
def base_data_ptr(self) -> int:
...

def base_data_ptr(self) -> int: ...
@property
def data(self) -> Optional[Buffer]:
...

def data(self) -> Optional[Buffer]: ...
@property
def data_ptr(self) -> int:
...

def set_base_data(self, value: Buffer) -> None:
...

def data_ptr(self) -> int: ...
def set_base_data(self, value: Buffer) -> None: ...
@property
def nullable(self) -> bool:
...

def has_nulls(self, include_nan: bool=False) -> bool:
...

def nullable(self) -> bool: ...
def has_nulls(self, include_nan: bool = False) -> bool: ...
@property
def base_mask(self) -> Optional[Buffer]:
...

def base_mask(self) -> Optional[Buffer]: ...
@property
def base_mask_ptr(self) -> int:
...

def base_mask_ptr(self) -> int: ...
@property
def mask(self) -> Optional[Buffer]:
...

def mask(self) -> Optional[Buffer]: ...
@property
def mask_ptr(self) -> int:
...

def set_base_mask(self, value: Optional[Buffer]) -> None:
...

def set_mask(self: T, value: Optional[Buffer]) -> T:
...

def mask_ptr(self) -> int: ...
def set_base_mask(self, value: Optional[Buffer]) -> None: ...
def set_mask(self: T, value: Optional[Buffer]) -> T: ...
@property
def null_count(self) -> int:
...

def null_count(self) -> int: ...
@property
def offset(self) -> int:
...

def offset(self) -> int: ...
@property
def base_children(self) -> Tuple[ColumnBase, ...]:
...

def base_children(self) -> Tuple[ColumnBase, ...]: ...
@property
def children(self) -> Tuple[ColumnBase, ...]:
...

def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None:
...

def _mimic_inplace(self, other_col: ColumnBase, inplace=False) -> Optional[ColumnBase]:
...

def children(self) -> Tuple[ColumnBase, ...]: ...
def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None: ...
def _mimic_inplace(
self, other_col: ColumnBase, inplace=False
) -> Optional[ColumnBase]: ...
@staticmethod
def from_scalar(
val: ScalarLike,
size: int
) -> ColumnBase: # TODO: This should be Scalar, not ScalarLike
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vyasr FYI this reformat drops comments in .pyi files. I noticed this in a previous attempt to update black but I wasn't sure how to address it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the note. I'll see what we can do about that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the issue is specifically the inline comments on lines that get compressed. I guess we'll just have to remember to put comments on separate lines from now on in pyi files.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With TODOs, wonder if we should just file these as issues. That would make them easier to track, cross-reference, triage, resolve, etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a fair question. I try to use my best judgment. TODOs have a lower energy barrier and issues feel excessive to track one-line changes, but there's definitely a visibility cost. Especially given the current state of cudf internals (heavily in flux) I'm inclined to be a bit lax about this for now. Maybe we get stricter in the future once we have a clearer design and code isn't constantly being deleted and rewritten, but at the moment it's not uncommon for TODOs like this to become moot when the relevant code paths are removed wholesale.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...at the moment it's not uncommon for TODOs like this to become moot when the relevant code paths are removed wholesale

Honestly this is my biggest worry with TODOs and have seen this happen to other code bases. Comments left that no longer pertain to the code they are near with no one that has a clue what they mean. It is clearer to see what issues intended and when they are resolved.

...
def from_scalar(val: ScalarLike, size: int) -> ColumnBase: ...
3 changes: 2 additions & 1 deletion python/cudf/cudf/comm/serialize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

import cudf # noqa: F401
from cudf.core.abc import Serializable

Expand Down Expand Up @@ -26,7 +28,6 @@ def dask_deserialize_cudf_object(header, frames):
with log_errors():
return Serializable.host_deserialize(header, frames)


except ImportError:
# distributed is probably not installed on the system
pass
10 changes: 8 additions & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1389,7 +1389,9 @@ def _constructor_expanddim(self):
return cudf.MultiIndex

def drop_duplicates(
self, keep="first", nulls_are_equal=True,
self,
keep="first",
nulls_are_equal=True,
):
"""
Drop duplicate rows in index.
Expand Down Expand Up @@ -1435,7 +1437,11 @@ def dropna(self, how="any"):
]

return self._from_columns_like_self(
drop_nulls(data_columns, how=how, keys=range(len(data_columns)),),
drop_nulls(
data_columns,
how=how,
keys=range(len(data_columns)),
),
self._column_names,
)

Expand Down
27 changes: 16 additions & 11 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ def _check_and_cast_columns_with_other(


def _normalize_columns_and_scalars_type(
frame: Frame, other: Any, inplace: bool = False,
) -> Tuple[
Union[Frame, ColumnLike], Any,
]:
frame: Frame,
other: Any,
inplace: bool = False,
) -> Tuple[Union[Frame, ColumnLike], Any]:
"""
Try to normalize the other's dtypes as per frame.

Expand Down Expand Up @@ -176,7 +176,10 @@ def _normalize_columns_and_scalars_type(


def where(
frame: Frame, cond: Any, other: Any = None, inplace: bool = False,
frame: Frame,
cond: Any,
other: Any = None,
inplace: bool = False,
) -> Optional[Union[Frame]]:
"""
Replace values where the condition is False.
Expand Down Expand Up @@ -266,9 +269,10 @@ def where(
# as `cond` has no column names.
cond._set_column_names_like(frame)

(source_df, others,) = _normalize_columns_and_scalars_type(
frame, other
)
(
source_df,
others,
) = _normalize_columns_and_scalars_type(frame, other)
if isinstance(others, Frame):
others = others._data.columns

Expand Down Expand Up @@ -340,9 +344,10 @@ def where(
"""Array conditional must be same shape as self"""
)

(input_col, other,) = _normalize_columns_and_scalars_type(
frame, other, inplace
)
(
input_col,
other,
) = _normalize_columns_and_scalars_type(frame, other, inplace)

if isinstance(input_col, cudf.core.column.CategoricalColumn):
if cudf.api.types.is_scalar(other):
Expand Down
Loading