From e23aa5cbb8ee3eff84d4e3cdd9ae1194e57f776f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 16:41:07 -0600 Subject: [PATCH 01/10] Run pyupgrade 2.31.0, except for changes to typing. --- ci/utils/nbtestlog2junitxml.py | 10 ++-- cpp/scripts/run-clang-format.py | 9 ++-- cpp/scripts/run-clang-tidy.py | 7 ++- cpp/scripts/sort_ninja_log.py | 2 +- docs/cudf/source/conf.py | 1 - python/cudf/cudf/_fuzz_testing/fuzzer.py | 2 +- python/cudf/cudf/_fuzz_testing/io.py | 4 +- python/cudf/cudf/_fuzz_testing/main.py | 2 +- python/cudf/cudf/_version.py | 19 ++++---- python/cudf/cudf/comm/gpuarrow.py | 2 +- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/column/string.py | 2 +- python/cudf/cudf/core/dataframe.py | 12 ++--- python/cudf/cudf/core/groupby/groupby.py | 2 +- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/join/join.py | 12 ++--- python/cudf/cudf/core/multiindex.py | 6 +-- python/cudf/cudf/core/scalar.py | 2 +- python/cudf/cudf/core/series.py | 10 ++-- python/cudf/cudf/core/udf/typing.py | 4 +- python/cudf/cudf/datasets.py | 6 +-- python/cudf/cudf/tests/test_api_types.py | 22 ++++----- python/cudf/cudf/tests/test_binops.py | 1 - python/cudf/cudf/tests/test_copying.py | 2 - python/cudf/cudf/tests/test_cuda_apply.py | 5 +- python/cudf/cudf/tests/test_dataframe.py | 4 +- python/cudf/cudf/tests/test_factorize.py | 4 +- python/cudf/cudf/tests/test_gcs.py | 6 +-- python/cudf/cudf/tests/test_groupby.py | 3 +- python/cudf/cudf/tests/test_hdfs.py | 26 +++++----- python/cudf/cudf/tests/test_query.py | 1 - python/cudf/cudf/tests/test_reductions.py | 1 - python/cudf/cudf/tests/test_s3.py | 44 ++++++++--------- python/cudf/cudf/tests/test_sorting.py | 2 +- python/cudf/cudf/tests/test_text.py | 2 +- python/cudf/cudf/tests/test_transform.py | 1 - python/cudf/cudf/tests/test_udf_binops.py | 1 - python/cudf/cudf/tests/test_unaops.py | 2 - python/cudf/cudf/utils/applyutils.py | 8 ++-- python/cudf/cudf/utils/cudautils.py | 2 +- python/cudf/cudf/utils/dtypes.py | 6 +-- python/cudf/cudf/utils/hash_vocab_utils.py | 22 ++++----- python/cudf/cudf/utils/queryutils.py | 8 ++-- python/cudf/setup.py | 2 +- python/cudf/versioneer.py | 48 ++++++++----------- python/cudf_kafka/cudf_kafka/_version.py | 19 ++++---- python/cudf_kafka/versioneer.py | 48 ++++++++----------- python/custreamz/custreamz/_version.py | 19 ++++---- .../custreamz/tests/test_dataframes.py | 1 - python/custreamz/versioneer.py | 48 ++++++++----------- python/dask_cudf/dask_cudf/_version.py | 19 ++++---- python/dask_cudf/dask_cudf/core.py | 12 ++--- python/dask_cudf/dask_cudf/io/orc.py | 4 +- .../dask_cudf/io/tests/test_parquet.py | 4 +- python/dask_cudf/setup.py | 2 +- python/dask_cudf/versioneer.py | 48 ++++++++----------- 56 files changed, 258 insertions(+), 307 deletions(-) diff --git a/ci/utils/nbtestlog2junitxml.py b/ci/utils/nbtestlog2junitxml.py index 15b362e4b70..6a421279112 100644 --- a/ci/utils/nbtestlog2junitxml.py +++ b/ci/utils/nbtestlog2junitxml.py @@ -7,11 +7,11 @@ from enum import Enum -startingPatt = re.compile("^STARTING: ([\w\.\-]+)$") -skippingPatt = re.compile("^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$") -exitCodePatt = re.compile("^EXIT CODE: (\d+)$") -folderPatt = re.compile("^FOLDER: ([\w\.\-]+)$") -timePatt = re.compile("^real\s+([\d\.ms]+)$") +startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$") +skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$") +exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$") +folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$") +timePatt = re.compile(r"^real\s+([\d\.ms]+)$") linePatt = re.compile("^" + ("-" * 80) + "$") diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index a7c83da22c5..3d462d65fb8 100755 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -13,7 +13,6 @@ # limitations under the License. # -from __future__ import print_function import argparse import os @@ -124,9 +123,9 @@ def run_clang_format(src, dst, exe, verbose, inplace): os.makedirs(dstdir) # run the clang format command itself if src == dst: - cmd = "%s -i %s" % (exe, src) + cmd = f"{exe} -i {src}" else: - cmd = "%s %s > %s" % (exe, src, dst) + cmd = f"{exe} {src} > {dst}" try: subprocess.check_call(cmd, shell=True) except subprocess.CalledProcessError: @@ -134,9 +133,9 @@ def run_clang_format(src, dst, exe, verbose, inplace): raise # run the diff to check if there are any formatting issues if inplace: - cmd = "diff -q %s %s >/dev/null" % (src, dst) + cmd = f"diff -q {src} {dst} >/dev/null" else: - cmd = "diff %s %s" % (src, dst) + cmd = f"diff {src} {dst}" try: subprocess.check_call(cmd, shell=True) diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 3a1a663e231..30e937d7f4d 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -13,7 +13,6 @@ # limitations under the License. # -from __future__ import print_function import re import os import subprocess @@ -67,7 +66,7 @@ def parse_args(): def get_all_commands(cdb): - with open(cdb, "r") as fp: + with open(cdb) as fp: return json.load(fp) @@ -195,10 +194,10 @@ def collect_result(result): def print_result(passed, stdout, file): status_str = "PASSED" if passed else "FAILED" - print("%s File:%s %s %s" % (SEPARATOR, file, status_str, SEPARATOR)) + print(f"{SEPARATOR} File:{file} {status_str} {SEPARATOR}") if stdout: print(stdout) - print("%s File:%s ENDS %s" % (SEPARATOR, file, SEPARATOR)) + print(f"{SEPARATOR} File:{file} ENDS {SEPARATOR}") def print_results(): diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py index bac6697da82..1891c607c5f 100755 --- a/cpp/scripts/sort_ninja_log.py +++ b/cpp/scripts/sort_ninja_log.py @@ -33,7 +33,7 @@ # build a map of the log entries entries = {} -with open(log_file, "r") as log: +with open(log_file) as log: for line in log: entry = line.split() if len(entry) > 4: diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 3d6d3ceb399..5de1d918a09 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # Copyright (c) 2018-2021, NVIDIA CORPORATION. # diff --git a/python/cudf/cudf/_fuzz_testing/fuzzer.py b/python/cudf/cudf/_fuzz_testing/fuzzer.py index 484b3fb26f4..a51a5073510 100644 --- a/python/cudf/cudf/_fuzz_testing/fuzzer.py +++ b/python/cudf/cudf/_fuzz_testing/fuzzer.py @@ -14,7 +14,7 @@ ) -class Fuzzer(object): +class Fuzzer: def __init__( self, target, diff --git a/python/cudf/cudf/_fuzz_testing/io.py b/python/cudf/cudf/_fuzz_testing/io.py index 193fb4c7f7f..dfc59a1f18d 100644 --- a/python/cudf/cudf/_fuzz_testing/io.py +++ b/python/cudf/cudf/_fuzz_testing/io.py @@ -16,7 +16,7 @@ ) -class IOFuzz(object): +class IOFuzz: def __init__( self, dirs=None, @@ -59,7 +59,7 @@ def __init__( self._current_buffer = None def _load_params(self, path): - with open(path, "r") as f: + with open(path) as f: params = json.load(f) self._inputs.append(params) diff --git a/python/cudf/cudf/_fuzz_testing/main.py b/python/cudf/cudf/_fuzz_testing/main.py index 7b28a4c4970..6b536fc3e2e 100644 --- a/python/cudf/cudf/_fuzz_testing/main.py +++ b/python/cudf/cudf/_fuzz_testing/main.py @@ -3,7 +3,7 @@ from cudf._fuzz_testing import fuzzer -class PythonFuzz(object): +class PythonFuzz: def __init__(self, func, params=None, data_handle=None, **kwargs): self.function = func self.data_handler_class = data_handle diff --git a/python/cudf/cudf/_version.py b/python/cudf/cudf/_version.py index a511ab98acf..be915b54c46 100644 --- a/python/cudf/cudf/_version.py +++ b/python/cudf/cudf/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py index b6089b65aa5..7879261139d 100644 --- a/python/cudf/cudf/comm/gpuarrow.py +++ b/python/cudf/cudf/comm/gpuarrow.py @@ -58,7 +58,7 @@ def to_dict(self): return dc -class GpuArrowNodeReader(object): +class GpuArrowNodeReader: def __init__(self, table, index): self._table = table self._field = table.schema[index] diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index b1335c7c076..5c5ccaf94c0 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1,6 +1,6 @@ # Copyright (c) 2021, NVIDIA CORPORATION. -from __future__ import annotations, division, print_function +from __future__ import annotations import pickle import warnings diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 9b44b4e6831..7f62d8fe03f 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5083,7 +5083,7 @@ def to_arrow(self) -> pa.Array: """ if self.null_count == len(self): return pa.NullArray.from_buffers( - pa.null(), len(self), [pa.py_buffer((b""))] + pa.null(), len(self), [pa.py_buffer(b"")] ) else: return super().to_arrow() diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c686cd0fd39..bd08ac385c7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1,6 +1,6 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. -from __future__ import annotations, division +from __future__ import annotations import functools import inspect @@ -4347,7 +4347,7 @@ def _verbose_repr(): dtype = self.dtypes.iloc[i] col = pprint_thing(col) - line_no = _put_str(" {num}".format(num=i), space_num) + line_no = _put_str(f" {i}", space_num) count = "" if show_counts: count = counts[i] @@ -5694,9 +5694,7 @@ def select_dtypes(self, include=None, exclude=None): if issubclass(dtype.type, e_dtype): exclude_subtypes.add(dtype.type) - include_all = set( - [cudf_dtype_from_pydata_dtype(d) for d in self.dtypes] - ) + include_all = {cudf_dtype_from_pydata_dtype(d) for d in self.dtypes} if include: inclusion = include_all & include_subtypes @@ -6416,8 +6414,8 @@ def _align_indices(lhs, rhs): lhs_out = DataFrame(index=df.index) rhs_out = DataFrame(index=df.index) common = set(lhs.columns) & set(rhs.columns) - common_x = set(["{}_x".format(x) for x in common]) - common_y = set(["{}_y".format(x) for x in common]) + common_x = {f"{x}_x" for x in common} + common_y = {f"{x}_y" for x in common} for col in df.columns: if col in common_x: lhs_out[col[:-2]] = df[col] diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index a393d8e9457..ff700144bed 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1461,7 +1461,7 @@ def apply(self, func): # TODO: should we define this as a dataclass instead? -class Grouper(object): +class Grouper: def __init__( self, key=None, level=None, freq=None, closed=None, label=None ): diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 91c7a740699..88371666ce6 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1,6 +1,6 @@ # Copyright (c) 2018-2021, NVIDIA CORPORATION. -from __future__ import annotations, division, print_function +from __future__ import annotations import math import pickle diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 704274815f6..39ff4718550 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -169,13 +169,11 @@ def __init__( if on else set() if (self._using_left_index or self._using_right_index) - else set( - [ - lkey.name - for lkey, rkey in zip(self._left_keys, self._right_keys) - if lkey.name == rkey.name - ] - ) + else { + lkey.name + for lkey, rkey in zip(self._left_keys, self._right_keys) + if lkey.name == rkey.name + } ) def perform_merge(self) -> Frame: diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index fa84889adea..3796b596ad6 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -115,7 +115,7 @@ def __init__( "MultiIndex has unequal number of levels and " "codes and is inconsistent!" ) - if len(set(c.size for c in codes._data.columns)) != 1: + if len({c.size for c in codes._data.columns}) != 1: raise ValueError( "MultiIndex length of codes does not match " "and is inconsistent!" @@ -752,7 +752,7 @@ def _index_and_downcast(self, result, index, index_key): # Pandas returns an empty Series with a tuple as name # the one expected result column result = cudf.Series._from_data( - {}, name=tuple((col[0] for col in index._data.columns)) + {}, name=tuple(col[0] for col in index._data.columns) ) elif out_index._num_columns == 1: # If there's only one column remaining in the output index, convert @@ -1222,7 +1222,7 @@ def _poplevels(self, level): if not pd.api.types.is_list_like(level): level = (level,) - ilevels = sorted([self._level_index_from_level(lev) for lev in level]) + ilevels = sorted(self._level_index_from_level(lev) for lev in level) if not ilevels: return None diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index b0770b71ca6..134b94bf0f2 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -17,7 +17,7 @@ ) -class Scalar(object): +class Scalar: """ A GPU-backed scalar object with NumPy scalar like properties May be used in binary operations against other scalars, cuDF diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 0371c40274f..7f00162099a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -167,7 +167,7 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: if ( isinstance(arg, tuple) and len(arg) == self._frame._index.nlevels - and not any((isinstance(x, slice) for x in arg)) + and not any(isinstance(x, slice) for x in arg) ): result = result.iloc[0] return result @@ -3121,7 +3121,7 @@ def _prepare_percentiles(percentiles): return percentiles def _format_percentile_names(percentiles): - return ["{0}%".format(int(x * 100)) for x in percentiles] + return [f"{int(x * 100)}%" for x in percentiles] def _format_stats_values(stats_data): return map(lambda x: round(x, 6), stats_data) @@ -3223,7 +3223,7 @@ def _describe_timestamp(self): .to_numpy(na_value=np.nan), ) ), - "max": str(pd.Timestamp((self.max()))), + "max": str(pd.Timestamp(self.max())), } return Series( @@ -3702,7 +3702,7 @@ def wrapper(self, other, level=None, fill_value=None, axis=0): setattr(Series, binop, make_binop_func(binop)) -class DatetimeProperties(object): +class DatetimeProperties: """ Accessor object for datetimelike properties of the Series values. @@ -4644,7 +4644,7 @@ def strftime(self, date_format, *args, **kwargs): ) -class TimedeltaProperties(object): +class TimedeltaProperties: """ Accessor object for timedeltalike properties of the Series values. diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index da7ff4c0e32..56e8bec74dc 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -133,8 +133,8 @@ def typeof_masked(val, c): class MaskedConstructor(ConcreteTemplate): key = api.Masked units = ["ns", "ms", "us", "s"] - datetime_cases = set(types.NPDatetime(u) for u in units) - timedelta_cases = set(types.NPTimedelta(u) for u in units) + datetime_cases = {types.NPDatetime(u) for u in units} + timedelta_cases = {types.NPTimedelta(u) for u in units} cases = [ nb_signature(MaskedType(t), t, types.boolean) for t in ( diff --git a/python/cudf/cudf/datasets.py b/python/cudf/cudf/datasets.py index 2341a5c23b9..d7a2fedef59 100644 --- a/python/cudf/cudf/datasets.py +++ b/python/cudf/cudf/datasets.py @@ -57,9 +57,7 @@ def timeseries( pd.date_range(start, end, freq=freq, name="timestamp") ) state = np.random.RandomState(seed) - columns = dict( - (k, make[dt](len(index), state)) for k, dt in dtypes.items() - ) + columns = {k: make[dt](len(index), state) for k, dt in dtypes.items()} df = pd.DataFrame(columns, index=index, columns=sorted(columns)) if df.index[-1] == end: df = df.iloc[:-1] @@ -110,7 +108,7 @@ def randomdata(nrows=10, dtypes=None, seed=None): if dtypes is None: dtypes = {"id": int, "x": float, "y": float} state = np.random.RandomState(seed) - columns = dict((k, make[dt](nrows, state)) for k, dt in dtypes.items()) + columns = {k: make[dt](nrows, state) for k, dt in dtypes.items()} df = pd.DataFrame(columns, columns=sorted(columns)) return cudf.from_pandas(df) diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py index 4d104c122d1..f8f93eaa2e3 100644 --- a/python/cudf/cudf/tests/test_api_types.py +++ b/python/cudf/cudf/tests/test_api_types.py @@ -17,7 +17,7 @@ (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -128,7 +128,7 @@ def test_is_categorical_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -235,7 +235,7 @@ def test_is_numeric_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -342,7 +342,7 @@ def test_is_integer_dtype(obj, expect): (int(), True), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -450,7 +450,7 @@ def test_is_integer(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -557,7 +557,7 @@ def test_is_string_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -664,7 +664,7 @@ def test_is_datetime_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -771,7 +771,7 @@ def test_is_list_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -881,7 +881,7 @@ def test_is_struct_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -988,7 +988,7 @@ def test_is_decimal_dtype(obj, expect): int(), float(), complex(), - str(), + '', "", r"", object(), @@ -1070,7 +1070,7 @@ def test_pandas_agreement(obj): int(), float(), complex(), - str(), + '', "", r"", object(), diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 921f2de38c2..76add8b9c5d 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1,6 +1,5 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. -from __future__ import division import decimal import operator diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py index 21a6a9172db..0d0ba579f22 100644 --- a/python/cudf/cudf/tests/test_copying.py +++ b/python/cudf/cudf/tests/test_copying.py @@ -1,5 +1,3 @@ -from __future__ import division, print_function - import numpy as np import pandas as pd import pytest diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py index a00dbbba5f0..e8bd64b5061 100644 --- a/python/cudf/cudf/tests/test_cuda_apply.py +++ b/python/cudf/cudf/tests/test_cuda_apply.py @@ -98,7 +98,7 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2): expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.hstack( - np.arange((e - s)) for s, e in zip(chunks, chunks[1:] + [len(df)]) + np.arange(e - s) for s, e in zip(chunks, chunks[1:] + [len(df)]) ) outdf = df.apply_chunks( @@ -141,8 +141,7 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2): expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.hstack( - tpb * np.arange((e - s)) - for s, e in zip(chunks, chunks[1:] + [len(df)]) + tpb * np.arange(e - s) for s, e in zip(chunks, chunks[1:] + [len(df)]) ) outdf = df.apply_chunks( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 5844055f7f0..b0c1b6fe2b6 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -845,7 +845,7 @@ def test_dataframe_to_string_wide(monkeypatch): # Test basic df = cudf.DataFrame() for i in range(100): - df["a{}".format(i)] = list(range(3)) + df[f"a{i}"] = list(range(3)) pd.options.display.max_columns = 0 got = df.to_string() @@ -1163,7 +1163,7 @@ def test_dataframe_hash_partition(nrows, nparts, nkeys): gdf = cudf.DataFrame() keycols = [] for i in range(nkeys): - keyname = "key{}".format(i) + keyname = f"key{i}" gdf[keyname] = np.random.randint(0, 7 - i, nrows) keycols.append(keyname) gdf["val1"] = np.random.randint(0, nrows * 2, nrows) diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py index 1f16686a6a6..3081b7c4a6e 100644 --- a/python/cudf/cudf/tests/test_factorize.py +++ b/python/cudf/cudf/tests/test_factorize.py @@ -23,7 +23,7 @@ def test_factorize_series_obj(ncats, nelem): assert isinstance(uvals, cp.ndarray) assert isinstance(labels, Index) - encoder = dict((labels[idx], idx) for idx in range(len(labels))) + encoder = {labels[idx]: idx for idx in range(len(labels))} handcoded = [encoder[v] for v in arr] np.testing.assert_array_equal(uvals.get(), handcoded) @@ -42,7 +42,7 @@ def test_factorize_index_obj(ncats, nelem): assert isinstance(uvals, cp.ndarray) assert isinstance(labels, Index) - encoder = dict((labels[idx], idx) for idx in range(len(labels))) + encoder = {labels[idx]: idx for idx in range(len(labels))} handcoded = [encoder[v] for v in arr] np.testing.assert_array_equal(uvals.get(), handcoded) diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py index db53529b22f..307232b1305 100644 --- a/python/cudf/cudf/tests/test_gcs.py +++ b/python/cudf/cudf/tests/test_gcs.py @@ -48,14 +48,14 @@ def mock_size(*args): # use_python_file_object=True, because the pyarrow # `open_input_file` command will fail (since it doesn't # use the monkey-patched `open` definition) - got = cudf.read_csv("gcs://{}".format(fpath), use_python_file_object=False) + got = cudf.read_csv(f"gcs://{fpath}", use_python_file_object=False) assert_eq(pdf, got) # AbstractBufferedFile -> PythonFile conversion # will work fine with the monkey-patched FS if we # pass in an fsspec file object fs = gcsfs.core.GCSFileSystem() - with fs.open("gcs://{}".format(fpath)) as f: + with fs.open(f"gcs://{fpath}") as f: got = cudf.read_csv(f) assert_eq(pdf, got) @@ -69,7 +69,7 @@ def mock_open(*args, **kwargs): return open(local_filepath, "wb") monkeypatch.setattr(gcsfs.core.GCSFileSystem, "open", mock_open) - gdf.to_orc("gcs://{}".format(gcs_fname)) + gdf.to_orc(f"gcs://{gcs_fname}") got = pa.orc.ORCFile(local_filepath).read().to_pandas() assert_eq(pdf, got) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index f5decd62ea9..7b92356dfba 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -85,8 +85,7 @@ def make_frame( def get_nelem(): - for elem in [2, 3, 1000]: - yield elem + yield from [2, 3, 1000] @pytest.fixture diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py index 24554f113bb..2d61d6693cb 100644 --- a/python/cudf/cudf/tests/test_hdfs.py +++ b/python/cudf/cudf/tests/test_hdfs.py @@ -62,7 +62,7 @@ def test_read_csv(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_csv_reader.csv".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_csv_reader.csv" got = cudf.read_csv(hd_fpath) @@ -81,7 +81,7 @@ def test_write_csv(pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_csv_writer.csv".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_csv_writer.csv" gdf.to_csv(hd_fpath, index=False) @@ -107,7 +107,7 @@ def test_read_parquet(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_parquet_reader.parquet".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_parquet_reader.parquet" got = cudf.read_parquet(hd_fpath) @@ -126,7 +126,7 @@ def test_write_parquet(pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_parquet_writer.parquet".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_parquet_writer.parquet" gdf.to_parquet(hd_fpath) @@ -153,7 +153,7 @@ def test_write_parquet_partitioned(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_parquet_partitioned.parquet".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_parquet_partitioned.parquet" # Clear data written from previous runs hdfs.rm(f"{basedir}/test_parquet_partitioned.parquet", recursive=True) gdf.to_parquet( @@ -186,7 +186,7 @@ def test_read_json(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_json_reader.json".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_json_reader.json" got = cudf.read_json(hd_fpath, engine="cudf", orient="records", lines=True) @@ -207,9 +207,9 @@ def test_read_orc(datadir, hdfs, test_url): hdfs.upload(basedir + "/file.orc", buffer) if test_url: - hd_fpath = "hdfs://{}:{}{}/file.orc".format(host, port, basedir) + hd_fpath = f"hdfs://{host}:{port}{basedir}/file.orc" else: - hd_fpath = "hdfs://{}/file.orc".format(basedir) + hd_fpath = f"hdfs://{basedir}/file.orc" got = cudf.read_orc(hd_fpath) expect = orc.ORCFile(buffer).read().to_pandas() @@ -226,7 +226,7 @@ def test_write_orc(pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_orc_writer.orc".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_orc_writer.orc" gdf.to_orc(hd_fpath) @@ -247,9 +247,9 @@ def test_read_avro(datadir, hdfs, test_url): hdfs.upload(basedir + "/file.avro", buffer) if test_url: - hd_fpath = "hdfs://{}:{}{}/file.avro".format(host, port, basedir) + hd_fpath = f"hdfs://{host}:{port}{basedir}/file.avro" else: - hd_fpath = "hdfs://{}/file.avro".format(basedir) + hd_fpath = f"hdfs://{basedir}/file.avro" got = cudf.read_avro(hd_fpath) with open(fname, mode="rb") as f: @@ -270,7 +270,7 @@ def test_storage_options(tmpdir, pdf, hdfs): # Write to hdfs hdfs.upload(basedir + "/file.csv", buffer) - hd_fpath = "hdfs://{}/file.csv".format(basedir) + hd_fpath = f"hdfs://{basedir}/file.csv" storage_options = {"host": host, "port": port} @@ -293,7 +293,7 @@ def test_storage_options_error(tmpdir, pdf, hdfs): # Write to hdfs hdfs.upload(basedir + "/file.csv", buffer) - hd_fpath = "hdfs://{}:{}{}/file.avro".format(host, port, basedir) + hd_fpath = f"hdfs://{host}:{port}{basedir}/file.avro" storage_options = {"host": host, "port": port} diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py index 3de38b2cf6f..09129a43f07 100644 --- a/python/cudf/cudf/tests/test_query.py +++ b/python/cudf/cudf/tests/test_query.py @@ -1,6 +1,5 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from __future__ import division, print_function import datetime import inspect diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index 40add502309..7106ab54686 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -1,6 +1,5 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -from __future__ import division, print_function import re from decimal import Decimal diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index da1ffc1fc16..29060927d75 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -147,7 +147,7 @@ def test_read_csv(s3_base, s3so, pdf, bytes_per_thread): # Use fsspec file object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, bytes_per_thread=bytes_per_thread, use_python_file_object=False, @@ -157,7 +157,7 @@ def test_read_csv(s3_base, s3so, pdf, bytes_per_thread): # Use Arrow PythonFile object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, bytes_per_thread=bytes_per_thread, use_python_file_object=True, @@ -174,7 +174,7 @@ def test_read_csv_arrow_nativefile(s3_base, s3so, pdf): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file("{}/{}".format(bname, fname)) as fil: + with fs.open_input_file(f"{bname}/{fname}") as fil: got = cudf.read_csv(fil) assert_eq(pdf, got) @@ -193,7 +193,7 @@ def test_read_csv_byte_range( # Use fsspec file object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, byte_range=(74, 73), bytes_per_thread=bytes_per_thread, @@ -213,15 +213,15 @@ def test_write_csv(s3_base, s3so, pdf, chunksize): gdf = cudf.from_pandas(pdf) with s3_context(s3_base=s3_base, bucket=bname) as s3fs: gdf.to_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", index=False, chunksize=chunksize, storage_options=s3so, ) - assert s3fs.exists("s3://{}/{}".format(bname, fname)) + assert s3fs.exists(f"s3://{bname}/{fname}") # TODO: Update to use `storage_options` from pandas v1.2.0 - got = pd.read_csv(s3fs.open("s3://{}/{}".format(bname, fname))) + got = pd.read_csv(s3fs.open(f"s3://{bname}/{fname}")) assert_eq(pdf, got) @@ -248,7 +248,7 @@ def test_read_parquet( buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got1 = cudf.read_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", open_file_options=( {"precache_options": {"method": precache}} if use_python_file_object @@ -266,9 +266,9 @@ def test_read_parquet( buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): fs = get_fs_token_paths( - "s3://{}/{}".format(bname, fname), storage_options=s3so + f"s3://{bname}/{fname}", storage_options=s3so )[0] - with fs.open("s3://{}/{}".format(bname, fname), mode="rb") as f: + with fs.open(f"s3://{bname}/{fname}", mode="rb") as f: got2 = cudf.read_parquet( f, bytes_per_thread=bytes_per_thread, @@ -297,7 +297,7 @@ def test_read_parquet_ext( buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got1 = cudf.read_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, bytes_per_thread=bytes_per_thread, footer_sample_size=3200, @@ -326,7 +326,7 @@ def test_read_parquet_arrow_nativefile(s3_base, s3so, pdf, columns): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file("{}/{}".format(bname, fname)) as fil: + with fs.open_input_file(f"{bname}/{fname}") as fil: got = cudf.read_parquet(fil, columns=columns) expect = pdf[columns] if columns else pdf @@ -343,7 +343,7 @@ def test_read_parquet_filters(s3_base, s3so, pdf_ext, precache): filters = [("String", "==", "Omega")] with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, filters=filters, open_file_options={"precache_options": {"method": precache}}, @@ -360,13 +360,13 @@ def test_write_parquet(s3_base, s3so, pdf, partition_cols): gdf = cudf.from_pandas(pdf) with s3_context(s3_base=s3_base, bucket=bname) as s3fs: gdf.to_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", partition_cols=partition_cols, storage_options=s3so, ) - assert s3fs.exists("s3://{}/{}".format(bname, fname)) + assert s3fs.exists(f"s3://{bname}/{fname}") - got = pd.read_parquet(s3fs.open("s3://{}/{}".format(bname, fname))) + got = pd.read_parquet(s3fs.open(f"s3://{bname}/{fname}")) assert_eq(pdf, got) @@ -383,7 +383,7 @@ def test_read_json(s3_base, s3so): with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_json( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", engine="cudf", orient="records", lines=True, @@ -407,7 +407,7 @@ def test_read_orc(s3_base, s3so, datadir, use_python_file_object, columns): with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_orc( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", columns=columns, storage_options=s3so, use_python_file_object=use_python_file_object, @@ -432,7 +432,7 @@ def test_read_orc_arrow_nativefile(s3_base, s3so, datadir, columns): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file("{}/{}".format(bname, fname)) as fil: + with fs.open_input_file(f"{bname}/{fname}") as fil: got = cudf.read_orc(fil, columns=columns) if columns: @@ -445,10 +445,10 @@ def test_write_orc(s3_base, s3so, pdf): bname = "orc" gdf = cudf.from_pandas(pdf) with s3_context(s3_base=s3_base, bucket=bname) as s3fs: - gdf.to_orc("s3://{}/{}".format(bname, fname), storage_options=s3so) - assert s3fs.exists("s3://{}/{}".format(bname, fname)) + gdf.to_orc(f"s3://{bname}/{fname}", storage_options=s3so) + assert s3fs.exists(f"s3://{bname}/{fname}") - with s3fs.open("s3://{}/{}".format(bname, fname)) as f: + with s3fs.open(f"s3://{bname}/{fname}") as f: got = pa.orc.ORCFile(f).read().to_pandas() assert_eq(pdf, got) diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index 00cd31e7539..10c3689fcd7 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -105,7 +105,7 @@ def test_series_argsort(nelem, dtype, asc): ) def test_series_sort_index(nelem, asc): np.random.seed(0) - sr = Series((100 * np.random.random(nelem))) + sr = Series(100 * np.random.random(nelem)) psr = sr.to_pandas() expected = psr.sort_index(ascending=asc) diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index a447a60c709..5ff66fc750f 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -763,7 +763,7 @@ def test_read_text(datadir): chess_file = str(datadir) + "/chess.pgn" delimiter = "1." - with open(chess_file, "r") as f: + with open(chess_file) as f: content = f.read().split(delimiter) # Since Python split removes the delimiter and read_text does diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py index 021c4052759..bd7ee45fbf8 100644 --- a/python/cudf/cudf/tests/test_transform.py +++ b/python/cudf/cudf/tests/test_transform.py @@ -1,6 +1,5 @@ # Copyright (c) 2018-2020, NVIDIA CORPORATION. -from __future__ import division import numpy as np import pytest diff --git a/python/cudf/cudf/tests/test_udf_binops.py b/python/cudf/cudf/tests/test_udf_binops.py index 935c3868a68..8f4d2e695d9 100644 --- a/python/cudf/cudf/tests/test_udf_binops.py +++ b/python/cudf/cudf/tests/test_udf_binops.py @@ -1,5 +1,4 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from __future__ import division import numpy as np import pytest diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index 22c78b5f933..dd736abd7d0 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -1,5 +1,3 @@ -from __future__ import division - import itertools import operator import re diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py index fa5cde76524..7876e3652f3 100644 --- a/python/cudf/cudf/utils/applyutils.py +++ b/python/cudf/cudf/utils/applyutils.py @@ -125,7 +125,7 @@ def make_aggregate_nullmask(df, columns=None, op="and"): return out_mask -class ApplyKernelCompilerBase(object): +class ApplyKernelCompilerBase: def __init__( self, func, incols, outcols, kwargs, pessimistic_nulls, cache_key ): @@ -251,7 +251,7 @@ def row_wise_kernel({args}): srcidx.format(a=a, start=start, stop=stop, stride=stride) ) - body.append("inner({})".format(args)) + body.append(f"inner({args})") indented = ["{}{}".format(" " * 4, ln) for ln in body] # Finalize source @@ -307,7 +307,7 @@ def chunk_wise_kernel(nrows, chunks, {args}): slicedargs = {} for a in argnames: if a not in extras: - slicedargs[a] = "{}[start:stop]".format(a) + slicedargs[a] = f"{a}[start:stop]" else: slicedargs[a] = str(a) body.append( @@ -359,4 +359,4 @@ def _load_cache_or_make_chunk_wise_kernel(func, *args, **kwargs): def _mangle_user(name): """Mangle user variable name""" - return "__user_{}".format(name) + return f"__user_{name}" diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index f0533dcaa72..742c747ab69 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -218,7 +218,7 @@ def make_cache_key(udf, sig): codebytes = udf.__code__.co_code constants = udf.__code__.co_consts if udf.__closure__ is not None: - cvars = tuple([x.cell_contents for x in udf.__closure__]) + cvars = tuple(x.cell_contents for x in udf.__closure__) cvarbytes = dumps(cvars) else: cvarbytes = b"" diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 44bbb1b493d..4cd1738996f 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -160,8 +160,8 @@ def numeric_normalize_types(*args): def _find_common_type_decimal(dtypes): # Find the largest scale and the largest difference between # precision and scale of the columns to be concatenated - s = max([dtype.scale for dtype in dtypes]) - lhs = max([dtype.precision - dtype.scale for dtype in dtypes]) + s = max(dtype.scale for dtype in dtypes) + lhs = max(dtype.precision - dtype.scale for dtype in dtypes) # Combine to get the necessary precision and clip at the maximum # precision p = s + lhs @@ -525,7 +525,7 @@ def find_common_type(dtypes): ) for dtype in dtypes ): - if len(set(dtype._categories.dtype for dtype in dtypes)) == 1: + if len({dtype._categories.dtype for dtype in dtypes}) == 1: return cudf.CategoricalDtype( cudf.core.column.concat_columns( [dtype._categories for dtype in dtypes] diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py index 45004c5f107..58e0541d3db 100644 --- a/python/cudf/cudf/utils/hash_vocab_utils.py +++ b/python/cudf/cudf/utils/hash_vocab_utils.py @@ -80,9 +80,9 @@ def _pick_initial_a_b(data, max_constant, init_bins): if score <= max_constant and longest <= MAX_SIZE_FOR_INITIAL_BIN: print( - "Attempting to build table using {:.6f}n space".format(score) + f"Attempting to build table using {score:.6f}n space" ) - print("Longest bin was {}".format(longest)) + print(f"Longest bin was {longest}") break return bins, a, b @@ -170,7 +170,7 @@ def _pack_keys_and_values(flattened_hash_table, original_dict): def _load_vocab_dict(path): vocab = {} - with open(path, mode="r", encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: counter = 0 for line in f: vocab[line.strip()] = counter @@ -193,17 +193,17 @@ def _store_func( ): with open(out_name, mode="w+") as f: - f.write("{}\n".format(outer_a)) - f.write("{}\n".format(outer_b)) - f.write("{}\n".format(num_outer_bins)) + f.write(f"{outer_a}\n") + f.write(f"{outer_b}\n") + f.write(f"{num_outer_bins}\n") f.writelines( - "{} {}\n".format(coeff, offset) + f"{coeff} {offset}\n" for coeff, offset in zip(inner_table_coeffs, offsets_into_ht) ) - f.write("{}\n".format(len(hash_table))) - f.writelines("{}\n".format(kv) for kv in hash_table) + f.write(f"{len(hash_table)}\n") + f.writelines(f"{kv}\n" for kv in hash_table) f.writelines( - "{}\n".format(tok_id) + f"{tok_id}\n" for tok_id in [unk_tok_id, first_token_id, sep_token_id] ) @@ -295,6 +295,6 @@ def hash_vocab( ) assert ( val == value - ), "Incorrect value found. Got {} expected {}".format(val, value) + ), f"Incorrect value found. Got {val} expected {value}" print("All present tokens return correct value.") diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index d9153c2b1d2..64218ddf46a 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -136,7 +136,7 @@ def query_compile(expr): key "args" is a sequence of name of the arguments. """ - funcid = "queryexpr_{:x}".format(np.uintp(hash(expr))) + funcid = f"queryexpr_{np.uintp(hash(expr)):x}" # Load cache compiled = _cache.get(funcid) # Cache not found @@ -147,7 +147,7 @@ def query_compile(expr): # compile devicefn = cuda.jit(device=True)(fn) - kernelid = "kernel_{}".format(funcid) + kernelid = f"kernel_{funcid}" kernel = _wrap_query_expr(kernelid, devicefn, args) compiled = info.copy() @@ -173,10 +173,10 @@ def _add_idx(arg): if arg.startswith(ENVREF_PREFIX): return arg else: - return "{}[idx]".format(arg) + return f"{arg}[idx]" def _add_prefix(arg): - return "_args_{}".format(arg) + return f"_args_{arg}" glbls = {"queryfn": fn, "cuda": cuda} kernargs = map(_add_prefix, args) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index a8e14504469..7e49bcb60ce 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -64,7 +64,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_version = None with open( - os.path.join(cuda_include_dir, "cuda.h"), "r", encoding="utf-8" + os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8" ) as f: for line in f.readlines(): if re.search(r"#define CUDA_VERSION ", line) is not None: diff --git a/python/cudf/versioneer.py b/python/cudf/versioneer.py index a6537a34ede..6bb75d8ae85 100644 --- a/python/cudf/versioneer.py +++ b/python/cudf/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/cudf_kafka/cudf_kafka/_version.py b/python/cudf_kafka/cudf_kafka/_version.py index 5ab5c72e457..8475afe1a6c 100644 --- a/python/cudf_kafka/cudf_kafka/_version.py +++ b/python/cudf_kafka/cudf_kafka/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/cudf_kafka/versioneer.py b/python/cudf_kafka/versioneer.py index 2260d5c2dcf..253f0547a94 100644 --- a/python/cudf_kafka/versioneer.py +++ b/python/cudf_kafka/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/custreamz/custreamz/_version.py b/python/custreamz/custreamz/_version.py index a3409a06953..8131fbf0c30 100644 --- a/python/custreamz/custreamz/_version.py +++ b/python/custreamz/custreamz/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py index 24f6e46f6c5..a7378408c24 100644 --- a/python/custreamz/custreamz/tests/test_dataframes.py +++ b/python/custreamz/custreamz/tests/test_dataframes.py @@ -4,7 +4,6 @@ Tests for Streamz Dataframes (SDFs) built on top of cuDF DataFrames. *** Borrowed from streamz.dataframe.tests | License at thirdparty/LICENSE *** """ -from __future__ import division, print_function import json import operator diff --git a/python/custreamz/versioneer.py b/python/custreamz/versioneer.py index 9c9ddae7340..d2a75c38787 100644 --- a/python/custreamz/versioneer.py +++ b/python/custreamz/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py index 8ca2cf98381..85dbc55c197 100644 --- a/python/dask_cudf/dask_cudf/_version.py +++ b/python/dask_cudf/dask_cudf/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index e191873f82b..729db6c232d 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -516,7 +516,7 @@ def _extract_meta(x): elif isinstance(x, list): return [_extract_meta(_x) for _x in x] elif isinstance(x, tuple): - return tuple([_extract_meta(_x) for _x in x]) + return tuple(_extract_meta(_x) for _x in x) elif isinstance(x, dict): return {k: _extract_meta(v) for k, v in x.items()} return x @@ -611,9 +611,7 @@ def reduction( if not isinstance(args, (tuple, list)): args = [args] - npartitions = set( - arg.npartitions for arg in args if isinstance(arg, _Frame) - ) + npartitions = {arg.npartitions for arg in args if isinstance(arg, _Frame)} if len(npartitions) > 1: raise ValueError("All arguments must have same number of partitions") npartitions = npartitions.pop() @@ -636,7 +634,7 @@ def reduction( ) # Chunk - a = "{0}-chunk-{1}".format(token or funcname(chunk), token_key) + a = f"{token or funcname(chunk)}-chunk-{token_key}" if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs: dsk = { (a, 0, i): (chunk, key) @@ -654,7 +652,7 @@ def reduction( } # Combine - b = "{0}-combine-{1}".format(token or funcname(combine), token_key) + b = f"{token or funcname(combine)}-combine-{token_key}" k = npartitions depth = 0 while k > split_every: @@ -670,7 +668,7 @@ def reduction( depth += 1 # Aggregate - b = "{0}-agg-{1}".format(token or funcname(aggregate), token_key) + b = f"{token or funcname(aggregate)}-agg-{token_key}" conc = (list, [(a, depth, i) for i in range(k)]) if aggregate_kwargs: dsk[(b, 0)] = (apply, aggregate, [conc], aggregate_kwargs) diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index 00fc197da9b..76c1978b83c 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -79,7 +79,9 @@ def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs): ex = set(columns) - set(schema) if ex: raise ValueError( - "Requested columns (%s) not in schema (%s)" % (ex, set(schema)) + "Requested columns ({}) not in schema ({})".format( + ex, set(schema) + ) ) else: columns = list(schema) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index 706b0e272ea..3e59b9c3fcc 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -40,12 +40,12 @@ def test_roundtrip_from_dask(tmpdir, stats): tmpdir = str(tmpdir) ddf.to_parquet(tmpdir, engine="pyarrow") files = sorted( - [ + ( os.path.join(tmpdir, f) for f in os.listdir(tmpdir) # TODO: Allow "_metadata" in list after dask#6047 if not f.endswith("_metadata") - ], + ), key=natural_sort_key, ) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 39491a45e7e..44534632117 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -34,7 +34,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_version = None with open( - os.path.join(cuda_include_dir, "cuda.h"), "r", encoding="utf-8" + os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8" ) as f: for line in f.readlines(): if re.search(r"#define CUDA_VERSION ", line) is not None: diff --git a/python/dask_cudf/versioneer.py b/python/dask_cudf/versioneer.py index a560f2e8797..0a66806cb6d 100644 --- a/python/dask_cudf/versioneer.py +++ b/python/dask_cudf/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") From 54b16b94cc7b5e300960a2aedec1f477e505b992 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 16:42:56 -0600 Subject: [PATCH 02/10] Run pyupgrade 2.31.0, only changes to typing. --- python/cudf/cudf/api/types.py | 2 +- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/buffer.py | 4 +- python/cudf/cudf/core/column/categorical.py | 50 +++++------ python/cudf/cudf/core/column/column.py | 86 +++++++++---------- python/cudf/cudf/core/column/datetime.py | 19 ++-- python/cudf/cudf/core/column/methods.py | 2 +- python/cudf/cudf/core/column/numerical.py | 26 +++--- .../cudf/cudf/core/column/numerical_base.py | 4 +- python/cudf/cudf/core/column/string.py | 46 +++++----- python/cudf/cudf/core/column/struct.py | 2 +- python/cudf/cudf/core/column/timedelta.py | 28 +++--- python/cudf/cudf/core/column_accessor.py | 16 ++-- python/cudf/cudf/core/dataframe.py | 6 +- python/cudf/cudf/core/frame.py | 58 ++++++------- python/cudf/cudf/core/index.py | 22 +++-- python/cudf/cudf/core/indexed_frame.py | 6 +- python/cudf/cudf/core/join/_join_helpers.py | 4 +- python/cudf/cudf/core/multiindex.py | 14 +-- python/cudf/cudf/core/series.py | 8 +- python/cudf/cudf/core/single_column_frame.py | 14 +-- python/cudf/cudf/core/subword_tokenizer.py | 2 +- python/cudf/cudf/testing/testing.py | 2 +- 23 files changed, 203 insertions(+), 220 deletions(-) diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index 6d5387591cb..050a71d83f0 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -200,7 +200,7 @@ def wrapped_func(obj): def _union_categoricals( - to_union: List[Union[cudf.Series, cudf.CategoricalIndex]], + to_union: list[cudf.Series | cudf.CategoricalIndex], sort_categories: bool = False, ignore_order: bool = False, ): diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 5c5ccaf94c0..a70681a06d3 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -39,7 +39,7 @@ class BaseIndex(Serializable): """Base class for all cudf Index types.""" dtype: DtypeObj - _accessors: Set[Any] = set() + _accessors: set[Any] = set() _data: ColumnAccessor def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py index 0658927975f..c5b4198581f 100644 --- a/python/cudf/cudf/core/buffer.py +++ b/python/cudf/cudf/core/buffer.py @@ -38,7 +38,7 @@ class Buffer(Serializable): _owner: Any def __init__( - self, data: Any = None, size: Optional[int] = None, owner: Any = None + self, data: Any = None, size: int | None = None, owner: Any = None ): if isinstance(data, Buffer): @@ -117,7 +117,7 @@ def _init_from_array_like(self, data, owner): f"Cannot construct Buffer from {data.__class__.__name__}" ) - def serialize(self) -> Tuple[dict, list]: + def serialize(self) -> tuple[dict, list]: header = {} # type: Dict[Any, Any] header["type-serialized"] = pickle.dumps(type(self)) header["constructor-kwargs"] = {} diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index de06e62cbb1..bafcc19ab1d 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -110,14 +110,14 @@ def __init__(self, parent: SeriesOrSingleColumnIndex): super().__init__(parent=parent) @property - def categories(self) -> "cudf.core.index.BaseIndex": + def categories(self) -> cudf.core.index.BaseIndex: """ The categories of this categorical. """ return cudf.core.index.as_index(self._column.categories) @property - def codes(self) -> "cudf.Series": + def codes(self) -> cudf.Series: """ Return Series of codes as well as the index. """ @@ -129,13 +129,13 @@ def codes(self) -> "cudf.Series": return cudf.Series(self._column.codes, index=index) @property - def ordered(self) -> Optional[bool]: + def ordered(self) -> bool | None: """ Whether the categories have an ordered relationship. """ return self._column.ordered - def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: + def as_ordered(self, inplace: bool = False) -> SeriesOrIndex | None: """ Set the Categorical to be ordered. @@ -192,7 +192,7 @@ def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: self._column.as_ordered(), inplace=inplace ) - def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: + def as_unordered(self, inplace: bool = False) -> SeriesOrIndex | None: """ Set the Categorical to be unordered. @@ -262,7 +262,7 @@ def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: def add_categories( self, new_categories: Any, inplace: bool = False - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Add new categories. @@ -347,7 +347,7 @@ def add_categories( def remove_categories( self, removals: Any, inplace: bool = False, - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Remove the specified categories. @@ -441,7 +441,7 @@ def set_categories( ordered: bool = False, rename: bool = False, inplace: bool = False, - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Set the categories to the specified new_categories. @@ -535,7 +535,7 @@ def reorder_categories( new_categories: Any, ordered: bool = False, inplace: bool = False, - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Reorder categories as specified in new_categories. @@ -624,8 +624,8 @@ class CategoricalColumn(column.ColumnBase): """ dtype: cudf.core.dtypes.CategoricalDtype - _codes: Optional[NumericalColumn] - _children: Tuple[NumericalColumn] + _codes: NumericalColumn | None + _children: tuple[NumericalColumn] def __init__( self, @@ -634,7 +634,7 @@ def __init__( size: int = None, offset: int = 0, null_count: int = None, - children: Tuple["column.ColumnBase", ...] = (), + children: tuple[column.ColumnBase, ...] = (), ): if size is None: @@ -671,8 +671,8 @@ def __contains__(self, item: ScalarLike) -> bool: return False return self._encode(item) in self.as_numerical - def serialize(self) -> Tuple[dict, list]: - header: Dict[Any, Any] = {} + def serialize(self) -> tuple[dict, list]: + header: dict[Any, Any] = {} frames = [] header["type-serialized"] = pickle.dumps(type(self)) header["dtype"], dtype_frames = self.dtype.serialize() @@ -729,23 +729,23 @@ def set_base_data(self, value): def _process_values_for_isin( self, values: Sequence - ) -> Tuple[ColumnBase, ColumnBase]: + ) -> tuple[ColumnBase, ColumnBase]: lhs = self # We need to convert values to same type as self, # hence passing dtype=self.dtype rhs = cudf.core.column.as_column(values, dtype=self.dtype) return lhs, rhs - def set_base_mask(self, value: Optional[Buffer]): + def set_base_mask(self, value: Buffer | None): super().set_base_mask(value) self._codes = None - def set_base_children(self, value: Tuple[ColumnBase, ...]): + def set_base_children(self, value: tuple[ColumnBase, ...]): super().set_base_children(value) self._codes = None @property - def children(self) -> Tuple[NumericalColumn]: + def children(self) -> tuple[NumericalColumn]: if self._children is None: codes_column = self.base_children[0] @@ -788,7 +788,7 @@ def codes(self) -> NumericalColumn: return cast(cudf.core.column.NumericalColumn, self._codes) @property - def ordered(self) -> Optional[bool]: + def ordered(self) -> bool | None: return self.dtype.ordered @ordered.setter @@ -842,7 +842,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> "column.ColumnBase": + ) -> column.ColumnBase: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -858,7 +858,7 @@ def _fill( def slice( self, start: int, stop: int, stride: int = None - ) -> "column.ColumnBase": + ) -> column.ColumnBase: codes = self.codes.slice(start, stop, stride) return cudf.core.column.build_categorical_column( categories=self.categories, @@ -909,7 +909,7 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: def sort_by_values( self, ascending: bool = True, na_position="last" - ) -> Tuple[CategoricalColumn, NumericalColumn]: + ) -> tuple[CategoricalColumn, NumericalColumn]: codes, inds = self.as_numerical.sort_by_values(ascending, na_position) col = column.build_categorical_column( categories=self.dtype.categories._values, @@ -991,7 +991,7 @@ def values(self): """ raise NotImplementedError("cudf.Categorical is not yet implemented") - def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase": + def clip(self, lo: ScalarLike, hi: ScalarLike) -> column.ColumnBase: return ( self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype) ) @@ -1329,7 +1329,7 @@ def memory_usage(self) -> int: def _mimic_inplace( self, other_col: ColumnBase, inplace: bool = False - ) -> Optional[ColumnBase]: + ) -> ColumnBase | None: out = super()._mimic_inplace(other_col, inplace=inplace) if inplace and isinstance(other_col, CategoricalColumn): self._codes = other_col._codes @@ -1572,7 +1572,7 @@ def as_unordered(self): def _create_empty_categorical_column( - categorical_column: CategoricalColumn, dtype: "CategoricalDtype" + categorical_column: CategoricalColumn, dtype: CategoricalDtype ) -> CategoricalColumn: return column.build_categorical_column( categories=column.as_column(dtype.categories), diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7999fa9039b..ed19264b228 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -83,7 +83,7 @@ class ColumnBase(Column, Serializable): - def as_frame(self) -> "cudf.core.frame.Frame": + def as_frame(self) -> cudf.core.frame.Frame: """ Converts a Column to Frame """ @@ -92,14 +92,14 @@ def as_frame(self) -> "cudf.core.frame.Frame": ) @property - def data_array_view(self) -> "cuda.devicearray.DeviceNDArray": + def data_array_view(self) -> cuda.devicearray.DeviceNDArray: """ View the data as a device array object """ return cuda.as_cuda_array(self.data).view(self.dtype) @property - def mask_array_view(self) -> "cuda.devicearray.DeviceNDArray": + def mask_array_view(self) -> cuda.devicearray.DeviceNDArray: """ View the mask as a device array """ @@ -115,7 +115,7 @@ def __repr__(self): f"dtype: {self.dtype}" ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -134,7 +134,7 @@ def __iter__(self): cudf.utils.utils.raise_iteration_error(obj=self) @property - def values_host(self) -> "np.ndarray": + def values_host(self) -> np.ndarray: """ Return a numpy representation of the Column. """ @@ -147,7 +147,7 @@ def values_host(self) -> "np.ndarray": return self.data_array_view.copy_to_host() @property - def values(self) -> "cupy.ndarray": + def values(self) -> cupy.ndarray: """ Return a CuPy representation of the Column. """ @@ -319,7 +319,7 @@ def _default_na_value(self) -> Any: # TODO: This method is deprecated and can be removed when the associated # Frame methods are removed. - def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray": + def to_gpu_array(self, fillna=None) -> cuda.devicearray.DeviceNDArray: """Get a dense numba device array for the data. Parameters @@ -365,7 +365,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> Optional[ColumnBase]: + ) -> ColumnBase | None: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -517,7 +517,7 @@ def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase: ) return self.take(gather_map) - def __getitem__(self, arg) -> Union[ScalarLike, ColumnBase]: + def __getitem__(self, arg) -> ScalarLike | ColumnBase: if _is_scalar_or_zero_d_array(arg): return self.element_indexing(int(arg)) elif isinstance(arg, slice): @@ -677,7 +677,7 @@ def append(self, other: ColumnBase) -> ColumnBase: def quantile( self, - q: Union[float, Sequence[float]], + q: float | Sequence[float], interpolation: builtins.str, exact: bool, ) -> ColumnBase: @@ -740,7 +740,7 @@ def isin(self, values: Sequence) -> ColumnBase: def _process_values_for_isin( self, values: Sequence - ) -> Tuple[ColumnBase, ColumnBase]: + ) -> tuple[ColumnBase, ColumnBase]: """ Helper function for `isin` which pre-process `values` based on `self`. """ @@ -752,7 +752,7 @@ def _process_values_for_isin( rhs = rhs.astype(lhs.dtype) return lhs, rhs - def _isin_earlystop(self, rhs: ColumnBase) -> Union[ColumnBase, None]: + def _isin_earlystop(self, rhs: ColumnBase) -> ColumnBase | None: """ Helper function for `isin` which determines possibility of early-stopping or not. @@ -847,7 +847,7 @@ def sort_by_values( self: ColumnBase, ascending: bool = True, na_position: builtins.str = "last", - ) -> Tuple[ColumnBase, "cudf.core.column.NumericalColumn"]: + ) -> tuple[ColumnBase, cudf.core.column.NumericalColumn]: col_inds = self.as_frame()._get_sorted_inds( ascending=ascending, na_position=na_position ) @@ -960,47 +960,47 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase: def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: raise NotImplementedError def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: raise NotImplementedError def as_interval_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.IntervalColumn": + ) -> cudf.core.column.IntervalColumn: raise NotImplementedError def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: raise NotImplementedError def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: raise NotImplementedError def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> Union["cudf.core.column.decimal.DecimalBaseColumn"]: + ) -> cudf.core.column.decimal.DecimalBaseColumn: raise NotImplementedError def as_decimal128_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.Decimal128Column": + ) -> cudf.core.column.Decimal128Column: raise NotImplementedError def as_decimal64_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.Decimal64Column": + ) -> cudf.core.column.Decimal64Column: raise NotImplementedError def as_decimal32_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.Decimal32Column": + ) -> cudf.core.column.Decimal32Column: raise NotImplementedError def apply_boolean_mask(self, mask) -> ColumnBase: @@ -1110,8 +1110,8 @@ def unique(self) -> ColumnBase: return drop_duplicates([self], keep="first")[0] - def serialize(self) -> Tuple[dict, list]: - header: Dict[Any, Any] = {} + def serialize(self) -> tuple[dict, list]: + header: dict[Any, Any] = {} frames = [] header["type-serialized"] = pickle.dumps(type(self)) header["dtype"] = self.dtype.str @@ -1155,7 +1155,7 @@ def binary_operator( def normalize_binop_value( self, other: ScalarLike - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: raise NotImplementedError def _minmax(self, skipna: bool = None): @@ -1217,7 +1217,7 @@ def nans_to_nulls(self: T) -> T: def _process_for_reduction( self, skipna: bool = None, min_count: int = 0 - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: skipna = True if skipna is None else skipna if skipna: @@ -1347,14 +1347,14 @@ def column_empty( def build_column( - data: Union[Buffer, None], + data: Buffer | None, dtype: Dtype, *, size: int = None, mask: Buffer = None, offset: int = 0, null_count: int = None, - children: Tuple[ColumnBase, ...] = (), + children: tuple[ColumnBase, ...] = (), ) -> ColumnBase: """ Build a Column of the appropriate type from the given parameters @@ -1516,7 +1516,7 @@ def build_categorical_column( offset: int = 0, null_count: int = None, ordered: bool = None, -) -> "cudf.core.column.CategoricalColumn": +) -> cudf.core.column.CategoricalColumn: """ Build a CategoricalColumn @@ -1606,7 +1606,7 @@ def build_list_column( size: int = None, offset: int = 0, null_count: int = None, -) -> "cudf.core.column.ListColumn": +) -> cudf.core.column.ListColumn: """ Build a ListColumn @@ -1638,13 +1638,13 @@ def build_list_column( def build_struct_column( names: Sequence[str], - children: Tuple[ColumnBase, ...], - dtype: Optional[Dtype] = None, + children: tuple[ColumnBase, ...], + dtype: Dtype | None = None, mask: Buffer = None, size: int = None, offset: int = 0, null_count: int = None, -) -> "cudf.core.column.StructColumn": +) -> cudf.core.column.StructColumn: """ Build a StructColumn @@ -2177,8 +2177,8 @@ def as_column( def _construct_array( - arbitrary: Any, dtype: Optional[Dtype] -) -> Union[np.ndarray, cupy.ndarray]: + arbitrary: Any, dtype: Dtype | None +) -> np.ndarray | cupy.ndarray: """ Construct a CuPy or NumPy array from `arbitrary` """ @@ -2212,7 +2212,7 @@ def _data_from_cuda_array_interface_desc(obj) -> Buffer: return data -def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]: +def _mask_from_cuda_array_interface_desc(obj) -> Buffer | None: desc = obj.__cuda_array_interface__ mask = desc.get("mask", None) @@ -2235,7 +2235,7 @@ def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]: return mask -def serialize_columns(columns) -> Tuple[List[dict], List]: +def serialize_columns(columns) -> tuple[list[dict], list]: """ Return the headers and frames resulting from serializing a list of Column @@ -2250,7 +2250,7 @@ def serialize_columns(columns) -> Tuple[List[dict], List]: frames : list list of frames """ - headers: List[Dict[Any, Any]] = [] + headers: list[dict[Any, Any]] = [] frames = [] if len(columns) > 0: @@ -2262,7 +2262,7 @@ def serialize_columns(columns) -> Tuple[List[dict], List]: return headers, frames -def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: +def deserialize_columns(headers: list[dict], frames: list) -> list[ColumnBase]: """ Construct a list of Columns from a list of headers and frames. @@ -2281,9 +2281,9 @@ def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: def arange( - start: Union[int, float], - stop: Union[int, float] = None, - step: Union[int, float] = 1, + start: int | float, + stop: int | float = None, + step: int | float = 1, dtype=None, ) -> ColumnBase: """ @@ -2372,7 +2372,7 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase: return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size) -def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: +def concat_columns(objs: MutableSequence[ColumnBase]) -> ColumnBase: """Concatenate a sequence of columns.""" if len(objs) == 0: dtype = cudf.dtype(None) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b763790986a..75b50f7e12e 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -193,7 +193,7 @@ def day_of_year(self) -> ColumnBase: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> "cudf.Series": + ) -> cudf.Series: # Workaround until following issue is fixed: # https://issues.apache.org/jira/browse/ARROW-9772 @@ -264,7 +264,7 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: raise TypeError(f"cannot normalize {type(other)}") @property - def as_numerical(self) -> "cudf.core.column.NumericalColumn": + def as_numerical(self) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", column.build_column( @@ -311,21 +311,21 @@ def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn: def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: raise TypeError( f"cannot astype a datetimelike from {self.dtype} to {dtype}" ) def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: if format is None: format = _dtype_to_format_conversion.get( self.dtype.name, "%Y-%m-%d %H:%M:%S" @@ -370,7 +370,7 @@ def median(self, skipna: bool = None) -> pd.Timestamp: ) def quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + self, q: float | Sequence[float], interpolation: str, exact: bool ) -> ColumnBase: result = self.as_numerical.quantile( q=q, interpolation=interpolation, exact=exact @@ -380,14 +380,11 @@ def quantile( return result.astype(self.dtype) def binary_operator( - self, - op: str, - rhs: Union[ColumnBase, "cudf.Scalar"], - reflect: bool = False, + self, op: str, rhs: ColumnBase | cudf.Scalar, reflect: bool = False, ) -> ColumnBase: if isinstance(rhs, cudf.DateOffset): return rhs._datetime_binop(self, op, reflect=reflect) - lhs: Union[ScalarLike, ColumnBase] = self + lhs: ScalarLike | ColumnBase = self if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"): out_dtype = cudf.dtype(np.bool_) # type: Dtype elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype): diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index 9bea94cfecb..6f0ffae3343 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -51,7 +51,7 @@ def _return_or_inplace( inplace: bool = False, expand: bool = False, retain_index: bool = True, - ) -> Optional[ParentType]: + ) -> ParentType | None: ... def _return_or_inplace( diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index a7481ce62a3..730e223f111 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -61,7 +61,7 @@ class NumericalColumn(NumericalBaseColumn): mask : Buffer, optional """ - _nan_count: Optional[int] + _nan_count: int | None def __init__( self, @@ -142,7 +142,7 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: return output - def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: + def unary_operator(self, unaryop: str | Callable) -> ColumnBase: if callable(unaryop): return libcudf.transform.transform(self, unaryop) @@ -179,7 +179,7 @@ def binary_operator( msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) if isinstance(rhs, cudf.core.column.Decimal128Column): - lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column( + lhs: ScalarLike | ColumnBase = self.as_decimal_column( Decimal128Dtype(Decimal128Dtype.MAX_PRECISION, 0) ) return lhs.binary_operator(binop, rhs) @@ -226,7 +226,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: def normalize_binop_value( self, other: ScalarLike - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: if other is None: return other if isinstance(other, cudf.Scalar): @@ -259,7 +259,7 @@ def normalize_binop_value( else: raise TypeError(f"cannot broadcast {type(other)}") - def int2ip(self) -> "cudf.core.column.StringColumn": + def int2ip(self) -> cudf.core.column.StringColumn: if self.dtype != cudf.dtype("int64"): raise TypeError("Only int64 type can be converted to ip") @@ -267,7 +267,7 @@ def int2ip(self) -> "cudf.core.column.StringColumn": def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: if len(self) > 0: return string._numeric_to_str_typecast_functions[ cudf.dtype(self.dtype) @@ -279,7 +279,7 @@ def as_string_column( def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: return cast( "cudf.core.column.DatetimeColumn", build_column( @@ -293,7 +293,7 @@ def as_datetime_column( def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: return cast( "cudf.core.column.TimeDeltaColumn", build_column( @@ -307,7 +307,7 @@ def as_timedelta_column( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalBaseColumn": + ) -> cudf.core.column.DecimalBaseColumn: return libcudf.unary.cast(self, dtype) def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn: @@ -327,7 +327,7 @@ def nan_count(self) -> int: def _process_values_for_isin( self, values: Sequence - ) -> Tuple[ColumnBase, ColumnBase]: + ) -> tuple[ColumnBase, ColumnBase]: lhs = cast("cudf.core.column.ColumnBase", self) rhs = as_column(values, nan_as_null=False) @@ -346,7 +346,7 @@ def _can_return_nan(self, skipna: bool = None) -> bool: def _process_for_reduction( self, skipna: bool = None, min_count: int = 0 - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: skipna = True if skipna is None else skipna if self._can_return_nan(skipna=skipna): @@ -642,7 +642,7 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> "pd.Series": + ) -> pd.Series: if nullable and self.dtype in np_dtypes_to_pandas_dtypes: pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype] arrow_array = self.to_arrow() @@ -670,7 +670,7 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype: def _normalize_find_and_replace_input( - input_column_dtype: DtypeObj, col_to_normalize: Union[ColumnBase, list] + input_column_dtype: DtypeObj, col_to_normalize: ColumnBase | list ) -> ColumnBase: normalized_column = column.as_column( col_to_normalize, diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index 1f84cb88e37..e5918578fbf 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -129,7 +129,7 @@ def skew(self, skipna: bool = None) -> ScalarLike: return skew def quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + self, q: float | Sequence[float], interpolation: str, exact: bool ) -> NumericalBaseColumn: if isinstance(q, Number) or cudf.api.types.is_list_like(q): np_array_q = np.asarray(q) @@ -158,7 +158,7 @@ def median(self, skipna: bool = None) -> NumericalBaseColumn: return self.quantile(0.5, interpolation="linear", exact=True) def _numeric_quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + self, q: float | Sequence[float], interpolation: str, exact: bool ) -> NumericalBaseColumn: quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q # get sorted indices and exclude nulls diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 7f62d8fe03f..7a00c7401d2 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -257,7 +257,7 @@ def cat(self, sep: str = None, na_rep: str = None) -> str: @overload def cat( self, others, sep: str = None, na_rep: str = None - ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]: + ) -> SeriesOrIndex | cudf.core.column.string.StringColumn: ... def cat(self, others=None, sep=None, na_rep=None): @@ -630,7 +630,7 @@ def extract( def contains( self, - pat: Union[str, Sequence], + pat: str | Sequence, case: bool = True, flags: int = 0, na=np.nan, @@ -771,7 +771,7 @@ def contains( ) return self._return_or_inplace(result_col) - def repeat(self, repeats: Union[int, Sequence],) -> SeriesOrIndex: + def repeat(self, repeats: int | Sequence,) -> SeriesOrIndex: """ Duplicate each string in the Series or Index. Equivalent to `str.repeat() @@ -826,8 +826,8 @@ def repeat(self, repeats: Union[int, Sequence],) -> SeriesOrIndex: def replace( self, - pat: Union[str, Sequence], - repl: Union[str, Sequence], + pat: str | Sequence, + repl: str | Sequence, n: int = -1, case=None, flags: int = 0, @@ -1988,7 +1988,7 @@ def filter_alphanum( ) def slice_from( - self, starts: "cudf.Series", stops: "cudf.Series" + self, starts: cudf.Series, stops: cudf.Series ) -> SeriesOrIndex: """ Return substring of each string using positions for each string. @@ -3608,7 +3608,7 @@ def endswith(self, pat: str) -> SeriesOrIndex: return self._return_or_inplace(result_col) - def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex: + def startswith(self, pat: str | Sequence) -> SeriesOrIndex: """ Test if the start of each string element matches a pattern. @@ -4285,7 +4285,7 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex: ) def detokenize( - self, indices: "cudf.Series", separator: str = " " + self, indices: cudf.Series, separator: str = " " ) -> SeriesOrIndex: """ Combines tokens into strings by concatenating them in the order @@ -4958,9 +4958,9 @@ class StringColumn(column.ColumnBase): respectively """ - _start_offset: Optional[int] - _end_offset: Optional[int] - _cached_sizeof: Optional[int] + _start_offset: int | None + _end_offset: int | None + _cached_sizeof: int | None def __init__( self, @@ -4968,7 +4968,7 @@ def __init__( size: int = None, # TODO: make non-optional offset: int = 0, null_count: int = None, - children: Tuple["column.ColumnBase", ...] = (), + children: tuple[column.ColumnBase, ...] = (), ): dtype = cudf.dtype("object") @@ -5123,7 +5123,7 @@ def __contains__(self, item: ScalarLike) -> bool: def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: out_dtype = cudf.dtype(dtype) string_col = self if out_dtype.kind in {"i", "u"}: @@ -5165,7 +5165,7 @@ def _as_datetime_or_timedelta_column(self, dtype, format): def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: out_dtype = cudf.dtype(dtype) # infer on host from the first not na element @@ -5189,14 +5189,14 @@ def as_datetime_column( def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: out_dtype = cudf.dtype(dtype) format = "%D days %H:%M:%S" return self._as_datetime_or_timedelta_column(out_dtype, format) def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalBaseColumn": + ) -> cudf.core.column.DecimalBaseColumn: return libstrings.to_decimal(self, dtype) def as_string_column( @@ -5240,7 +5240,7 @@ def to_array(self, fillna: bool = None) -> np.ndarray: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> "pd.Series": + ) -> pd.Series: if nullable: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) pd_series = pd.Series(pandas_array, copy=False) @@ -5251,8 +5251,8 @@ def to_pandas( pd_series.index = index return pd_series - def serialize(self) -> Tuple[dict, list]: - header: Dict[Any, Any] = {"null_count": self.null_count} + def serialize(self) -> tuple[dict, list]: + header: dict[Any, Any] = {"null_count": self.null_count} header["type-serialized"] = pickle.dumps(type(self)) header["size"] = self.size @@ -5366,7 +5366,7 @@ def fillna( else: return super().fillna(method=method) - def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]: + def _find_first_and_last(self, value: ScalarLike) -> tuple[int, int]: found_indices = libcudf.search.contains( self, column.as_column([value], dtype=self.dtype) ) @@ -5383,7 +5383,7 @@ def find_first_value( def find_last_value(self, value: ScalarLike, closest: bool = False) -> int: return self._find_first_and_last(value)[1] - def normalize_binop_value(self, other) -> "column.ColumnBase": + def normalize_binop_value(self, other) -> column.ColumnBase: # fastpath: gpu scalar if isinstance(other, cudf.Scalar) and other.dtype == "object": return column.as_column(other, length=len(self)) @@ -5407,7 +5407,7 @@ def _default_na_value(self) -> ScalarLike: def binary_operator( self, op: builtins.str, rhs, reflect: bool = False - ) -> "column.ColumnBase": + ) -> column.ColumnBase: lhs = self if reflect: lhs, rhs = rhs, lhs @@ -5431,7 +5431,7 @@ def binary_operator( ) @copy_docstring(column.ColumnBase.view) - def view(self, dtype) -> "cudf.core.column.ColumnBase": + def view(self, dtype) -> cudf.core.column.ColumnBase: if self.null_count > 0: raise ValueError( "Can not produce a view of a string column with nulls" diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index f0d02a706e2..c344ab3739d 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -81,7 +81,7 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 4b7a3bcc197..da485a144bc 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -133,7 +133,7 @@ def to_pandas( def _binary_op_floordiv( self, rhs: BinaryOperand - ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: + ) -> tuple[column.ColumnBase, BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -203,7 +203,7 @@ def _binary_op_lt_gt_le_ge(self, rhs: BinaryOperand) -> DtypeObj: def _binary_op_truediv( self, rhs: BinaryOperand - ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: + ) -> tuple[column.ColumnBase, BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -229,7 +229,7 @@ def _binary_op_truediv( def binary_operator( self, op: str, rhs: BinaryOperand, reflect: bool = False - ) -> "column.ColumnBase": + ) -> column.ColumnBase: lhs, rhs = self, rhs if op in ("eq", "ne"): @@ -292,7 +292,7 @@ def normalize_binop_value(self, other) -> BinaryOperand: raise TypeError(f"cannot normalize {type(other)}") @property - def as_numerical(self) -> "cudf.core.column.NumericalColumn": + def as_numerical(self) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", column.build_column( @@ -334,21 +334,21 @@ def fillna( def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: raise TypeError( f"cannot astype a timedelta from {self.dtype} to {dtype}" ) def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: if format is None: format = _dtype_to_format_conversion.get( self.dtype.name, "%D days %H:%M:%S" @@ -384,8 +384,8 @@ def isin(self, values: Sequence) -> ColumnBase: return cudf.core.tools.datetimes._isin_datetimelike(self, values) def quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool - ) -> "column.ColumnBase": + self, q: float | Sequence[float], interpolation: str, exact: bool + ) -> column.ColumnBase: result = self.as_numerical.quantile( q=q, interpolation=interpolation, exact=exact ) @@ -411,7 +411,7 @@ def std( unit=self.time_unit, ) - def components(self, index=None) -> "cudf.DataFrame": + def components(self, index=None) -> cudf.DataFrame: """ Return a Dataframe of the components of the Timedeltas. @@ -505,7 +505,7 @@ def components(self, index=None) -> "cudf.DataFrame": ) @property - def days(self) -> "cudf.core.column.NumericalColumn": + def days(self) -> cudf.core.column.NumericalColumn: """ Number of days for each element. @@ -518,7 +518,7 @@ def days(self) -> "cudf.core.column.NumericalColumn": ) @property - def seconds(self) -> "cudf.core.column.NumericalColumn": + def seconds(self) -> cudf.core.column.NumericalColumn: """ Number of seconds (>= 0 and less than 1 day). @@ -541,7 +541,7 @@ def seconds(self) -> "cudf.core.column.NumericalColumn": ) @property - def microseconds(self) -> "cudf.core.column.NumericalColumn": + def microseconds(self) -> cudf.core.column.NumericalColumn: """ Number of microseconds (>= 0 and less than 1 second). @@ -561,7 +561,7 @@ def microseconds(self) -> "cudf.core.column.NumericalColumn": ) @property - def nanoseconds(self) -> "cudf.core.column.NumericalColumn": + def nanoseconds(self) -> cudf.core.column.NumericalColumn: """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index c2ea9d756f7..d836dc5b2db 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -94,13 +94,13 @@ class ColumnAccessor(MutableMapping): may be passe. """ - _data: "Dict[Any, ColumnBase]" + _data: dict[Any, ColumnBase] multiindex: bool - _level_names: Tuple[Any, ...] + _level_names: tuple[Any, ...] def __init__( self, - data: Union[MutableMapping, ColumnAccessor] = None, + data: MutableMapping | ColumnAccessor = None, multiindex: bool = False, level_names=None, ): @@ -137,7 +137,7 @@ def __init__( @classmethod def _create_unsafe( cls, - data: Dict[Any, ColumnBase], + data: dict[Any, ColumnBase], multiindex: bool = False, level_names=None, ) -> ColumnAccessor: @@ -177,7 +177,7 @@ def __repr__(self) -> str: return f"{type_info}\n{column_info}" @property - def level_names(self) -> Tuple[Any, ...]: + def level_names(self) -> tuple[Any, ...]: if self._level_names is None or len(self._level_names) == 0: return tuple((None,) * max(1, self.nlevels)) else: @@ -206,11 +206,11 @@ def nrows(self) -> int: return len(next(iter(self.values()))) @cached_property - def names(self) -> Tuple[Any, ...]: + def names(self) -> tuple[Any, ...]: return tuple(self.keys()) @cached_property - def columns(self) -> Tuple[ColumnBase, ...]: + def columns(self) -> tuple[ColumnBase, ...]: return tuple(self.values()) @cached_property @@ -463,7 +463,7 @@ def _pad_key(self, key: Any, pad_value="") -> Any: return key + (pad_value,) * (self.nlevels - len(key)) def rename_levels( - self, mapper: Union[Mapping[Any, Any], Callable], level: Optional[int] + self, mapper: Mapping[Any, Any] | Callable, level: int | None ) -> ColumnAccessor: """ Rename the specified levels of the given ColumnAccessor diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bd08ac385c7..336b659d115 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -503,7 +503,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): """ _PROTECTED_KEYS = frozenset(("_data", "_index")) - _accessors: Set[Any] = set() + _accessors: set[Any] = set() _loc_indexer_type = _DataFrameLocIndexer _iloc_indexer_type = _DataFrameIlocIndexer @@ -822,7 +822,7 @@ def _init_from_dict_like( def _from_data( cls, data: MutableMapping, - index: Optional[BaseIndex] = None, + index: BaseIndex | None = None, columns: Any = None, ) -> DataFrame: out = super()._from_data(data, index) @@ -6433,7 +6433,7 @@ def _setitem_with_dataframe( input_df: DataFrame, replace_df: DataFrame, input_cols: Any = None, - mask: Optional[cudf.core.column.ColumnBase] = None, + mask: cudf.core.column.ColumnBase | None = None, ignore_index: bool = False, ): """ diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 69dc5389e7a..9d86aa30b16 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -65,11 +65,11 @@ class Frame: A Frame representing the (optional) index columns. """ - _data: "ColumnAccessor" + _data: ColumnAccessor # TODO: Once all dependence on Frame having an index is removed, this # attribute should be moved to IndexedFrame. - _index: Optional[cudf.core.index.BaseIndex] - _names: Optional[List] + _index: cudf.core.index.BaseIndex | None + _names: list | None def __init__(self, data=None, index=None): if data is None: @@ -90,11 +90,11 @@ def _num_rows(self) -> int: return len(self._data.columns[0]) @property - def _column_names(self) -> List[Any]: # TODO: List[str]? + def _column_names(self) -> list[Any]: # TODO: List[str]? return self._data.names @property - def _index_names(self) -> List[Any]: # TODO: List[str]? + def _index_names(self) -> list[Any]: # TODO: List[str]? # TODO: Temporarily suppressing mypy warnings to avoid introducing bugs # by returning an empty list where one is not expected. return ( @@ -104,7 +104,7 @@ def _index_names(self) -> List[Any]: # TODO: List[str]? ) @property - def _columns(self) -> List[Any]: # TODO: List[Column]? + def _columns(self) -> list[Any]: # TODO: List[Column]? return self._data.columns def serialize(self): @@ -126,7 +126,7 @@ def deserialize(cls, header, frames): def _from_data( cls, data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, + index: cudf.core.index.BaseIndex | None = None, ): obj = cls.__new__(cls) Frame.__init__(obj, data, index) @@ -135,9 +135,9 @@ def _from_data( @classmethod def _from_columns( cls, - columns: List[ColumnBase], - column_names: List[str], - index_names: Optional[List[str]] = None, + columns: list[ColumnBase], + column_names: list[str], + index_names: list[str] | None = None, ): """Construct a `Frame` object from a list of columns. @@ -165,9 +165,9 @@ def _from_columns( def _from_columns_like_self( self, - columns: List[ColumnBase], - column_names: List[str], - index_names: Optional[List[str]] = None, + columns: list[ColumnBase], + column_names: list[str], + index_names: list[str] | None = None, ): """Construct a `Frame` from a list of columns with metadata from self. @@ -181,7 +181,7 @@ def _from_columns_like_self( def _mimic_inplace( self: T, result: Frame, inplace: bool = False - ) -> Optional[Frame]: + ) -> Frame | None: if inplace: for col in self._data: if col in result._data: @@ -616,9 +616,9 @@ def _to_array( self, get_column_values: Callable, make_empty_matrix: Callable, - dtype: Union[Dtype, None] = None, + dtype: Dtype | None = None, na_value=None, - ) -> Union[cupy.ndarray, np.ndarray]: + ) -> cupy.ndarray | np.ndarray: # Internal function to implement to_cupy and to_numpy, which are nearly # identical except for the attribute they access to generate values. @@ -650,10 +650,7 @@ def get_column_values_na(col): return matrix def to_cupy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = False, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = False, na_value=None, ) -> cupy.ndarray: """Convert the Frame to a CuPy array. @@ -684,10 +681,7 @@ def to_cupy( ) def to_numpy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = True, na_value=None, ) -> np.ndarray: """Convert the Frame to a NumPy array. @@ -3440,7 +3434,7 @@ def _binaryop( @classmethod def _colwise_binop( cls, - operands: Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]], fn: str, ): """Implement binary ops between two frame-like objects. @@ -6438,8 +6432,8 @@ def ge(self, other, axis="columns", level=None, fill_value=None): def _get_replacement_values_for_columns( - to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any] -) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]: + to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any] +) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]: """ Returns a per column mapping for the values to be replaced, new values to be replaced with and if all the values are empty. @@ -6464,9 +6458,9 @@ def _get_replacement_values_for_columns( A dict mapping of all columns and the corresponding values to be replaced with. """ - to_replace_columns: Dict[Any, Any] = {} - values_columns: Dict[Any, Any] = {} - all_na_columns: Dict[Any, Any] = {} + to_replace_columns: dict[Any, Any] = {} + values_columns: dict[Any, Any] = {} + all_na_columns: dict[Any, Any] = {} if is_scalar(to_replace) and is_scalar(value): to_replace_columns = {col: [to_replace] for col in columns_dtype_map} @@ -6603,8 +6597,8 @@ def _is_series(obj): def _drop_rows_by_labels( obj: DataFrameOrSeries, - labels: Union[ColumnLike, abc.Iterable, str], - level: Union[int, str], + labels: ColumnLike | abc.Iterable | str, + level: int | str, errors: str, ) -> DataFrameOrSeries: """Remove rows specified by `labels`. If `errors=True`, an error is raised diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 88371666ce6..983525e4ea2 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -60,10 +60,8 @@ def _lexsorted_equal_range( - idx: Union[GenericIndex, cudf.MultiIndex], - key_as_table: Frame, - is_sorted: bool, -) -> Tuple[int, int, Optional[ColumnBase]]: + idx: GenericIndex | cudf.MultiIndex, key_as_table: Frame, is_sorted: bool, +) -> tuple[int, int, ColumnBase | None]: """Get equal range for key in lexicographically sorted index. If index is not sorted when called, a sort will take place and `sort_inds` is returned. Otherwise `None` is returned in that position. @@ -94,8 +92,8 @@ def _index_from_data(data: MutableMapping, name: Any = None): if isinstance(values, NumericalColumn): try: - index_class_type: Type[ - Union[GenericIndex, cudf.MultiIndex] + index_class_type: type[ + GenericIndex | cudf.MultiIndex ] = _dtype_to_index[values.dtype.type] except KeyError: index_class_type = GenericIndex @@ -115,7 +113,7 @@ def _index_from_data(data: MutableMapping, name: Any = None): def _index_from_columns( - columns: List[cudf.core.column.ColumnBase], name: Any = None + columns: list[cudf.core.column.ColumnBase], name: Any = None ): """Construct an index from ``columns``, with levels named 0, 1, 2...""" return _index_from_data(dict(zip(range(len(columns)), columns)), name=name) @@ -1213,7 +1211,7 @@ class NumericIndex(GenericIndex): """ # Subclasses must define the dtype they are associated with. - _dtype: Union[None, Type[np.number]] = None + _dtype: None | type[np.number] = None def __init__(self, data=None, dtype=None, copy=False, name=None): @@ -2246,7 +2244,7 @@ def is_categorical(self): def interval_range( start=None, end=None, periods=None, freq=None, name=None, closed="right", -) -> "IntervalIndex": +) -> IntervalIndex: """ Returns a fixed frequency IntervalIndex. @@ -2593,7 +2591,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex: ) -_dtype_to_index: Dict[Any, Type[NumericIndex]] = { +_dtype_to_index: dict[Any, type[NumericIndex]] = { np.int8: Int8Index, np.int16: Int16Index, np.int32: Int32Index, @@ -2704,7 +2702,7 @@ def from_arrow(cls, obj): return cudf.MultiIndex.from_arrow(obj) -def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: +def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: """ An internal Utility function to concat RangeIndex objects. """ @@ -2744,7 +2742,7 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: return RangeIndex(start, stop, step) -def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]: +def _extended_gcd(a: int, b: int) -> tuple[int, int, int]: """ Extended Euclidean algorithms to solve Bezout's identity: a*x + b*y = gcd(x, y) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 72878078593..784be09494c 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -135,8 +135,8 @@ class IndexedFrame(Frame): """ # mypy can't handle bound type variables as class members - _loc_indexer_type: Type[_LocIndexerClass] # type: ignore - _iloc_indexer_type: Type[_IlocIndexerClass] # type: ignore + _loc_indexer_type: type[_LocIndexerClass] # type: ignore + _iloc_indexer_type: type[_IlocIndexerClass] # type: ignore _index: cudf.core.index.BaseIndex def __init__(self, data=None, index=None): @@ -1486,7 +1486,7 @@ def _reset_index(self, level, drop, col_level=0, col_fill=""): def _first_or_last( self, offset, idx: int, op: Callable, side: str, slice_func: Callable - ) -> "IndexedFrame": + ) -> IndexedFrame: """Shared code path for ``first`` and ``last``.""" if not isinstance(self._index, cudf.core.index.DatetimeIndex): raise TypeError("'first' only supports a DatetimeIndex index.") diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index ead0cd566d9..7589ed27484 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -57,7 +57,7 @@ def set(self, obj: Frame, value: ColumnBase, validate=False): def _match_join_keys( lcol: ColumnBase, rcol: ColumnBase, how: str -) -> Tuple[ColumnBase, ColumnBase]: +) -> tuple[ColumnBase, ColumnBase]: # Casts lcol and rcol to a common dtype for use as join keys. If no casting # is necessary, they are returned as is. @@ -118,7 +118,7 @@ def _match_join_keys( def _match_categorical_dtypes_both( lcol: CategoricalColumn, rcol: CategoricalColumn, how: str -) -> Tuple[ColumnBase, ColumnBase]: +) -> tuple[ColumnBase, ColumnBase]: ltype, rtype = lcol.dtype, rcol.dtype # when both are ordered and both have the same categories, diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 3796b596ad6..0c573d4efed 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -255,7 +255,7 @@ def set_names(self, names, level=None, inplace=False): def _from_data( cls, data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, + index: cudf.core.index.BaseIndex | None = None, name: Any = None, ) -> MultiIndex: assert index is None @@ -781,9 +781,9 @@ def _index_and_downcast(self, result, index, index_key): def _get_row_major( self, df: DataFrameOrSeries, - row_tuple: Union[ - numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]] - ], + row_tuple: ( + numbers.Number | slice | tuple[Any, ...] | list[tuple[Any, ...]] + ), ) -> DataFrameOrSeries: if pd.api.types.is_bool_dtype( list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple @@ -805,9 +805,9 @@ def _get_row_major( def _validate_indexer( self, - indexer: Union[ - numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]] - ], + indexer: ( + numbers.Number | slice | tuple[Any, ...] | list[tuple[Any, ...]] + ), ): if isinstance(indexer, numbers.Number): return diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 7f00162099a..e2d350f0f5b 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -156,7 +156,7 @@ class _SeriesLocIndexer(_FrameIndexer): Label-based selection """ - def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: + def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: if isinstance(arg, pd.MultiIndex): arg = cudf.from_pandas(arg) @@ -281,7 +281,7 @@ class Series(SingleColumnFrame, IndexedFrame, Serializable): If ``False``, leaves ``np.nan`` values as is. """ - _accessors: Set[Any] = set() + _accessors: set[Any] = set() _loc_indexer_type = _SeriesLocIndexer _iloc_indexer_type = _SeriesIlocIndexer @@ -450,7 +450,7 @@ def __init__( def _from_data( cls, data: MutableMapping, - index: Optional[BaseIndex] = None, + index: BaseIndex | None = None, name: Any = None, ) -> Series: """ @@ -1049,7 +1049,7 @@ def __array_function__(self, func, types, args, kwargs): kwargs, ) - def map(self, arg, na_action=None) -> "Series": + def map(self, arg, na_action=None) -> Series: """ Map values of Series according to input correspondence. diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 7793a2fdf29..2d98c66e726 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -57,7 +57,7 @@ def _scan(self, op, axis=None, *args, **kwargs): def _from_data( cls, data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, + index: cudf.core.index.BaseIndex | None = None, name: Any = None, ): @@ -118,18 +118,12 @@ def values_host(self): # noqa: D102 return self._column.values_host def to_cupy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = True, na_value=None, ) -> cupy.ndarray: # noqa: D102 return super().to_cupy(dtype, copy, na_value).flatten() def to_numpy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = True, na_value=None, ) -> np.ndarray: # noqa: D102 return super().to_numpy(dtype, copy, na_value).flatten() @@ -294,7 +288,7 @@ def _make_operands_for_binop( reflect: bool = False, *args, **kwargs, - ) -> Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]: + ) -> dict[str | None, tuple[ColumnBase, Any, bool, Any]]: """Generate the dictionary of operands used for a binary operation. Parameters diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index 782b74ef4a6..cb72faac004 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -61,7 +61,7 @@ def __call__( max_num_rows: int, add_special_tokens: bool = True, padding: str = "max_length", - truncation: Union[bool, str] = False, + truncation: bool | str = False, stride: int = 0, return_tensors: str = "cp", return_token_type_ids: bool = False, diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 59c291eea0b..79d08cca2a2 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -239,7 +239,7 @@ def assert_index_equal( right, exact="equiv", check_names: bool = True, - check_less_precise: Union[bool, int] = False, + check_less_precise: bool | int = False, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, From 0360c31cbb1675cc4298d1f8de7d8ad292bc7fd3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 16:44:49 -0600 Subject: [PATCH 03/10] Reapply black formatting. --- python/cudf/cudf/tests/test_api_types.py | 22 +++++++++++----------- python/cudf/cudf/tests/test_s3.py | 6 +++--- python/cudf/cudf/utils/hash_vocab_utils.py | 4 +--- python/cudf/setup.py | 4 +--- python/cudf/versioneer.py | 6 +++++- python/cudf_kafka/versioneer.py | 6 +++++- python/custreamz/versioneer.py | 6 +++++- python/dask_cudf/setup.py | 4 +--- python/dask_cudf/versioneer.py | 6 +++++- 9 files changed, 37 insertions(+), 27 deletions(-) diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py index f8f93eaa2e3..1bc5f092aa7 100644 --- a/python/cudf/cudf/tests/test_api_types.py +++ b/python/cudf/cudf/tests/test_api_types.py @@ -17,7 +17,7 @@ (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -128,7 +128,7 @@ def test_is_categorical_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -235,7 +235,7 @@ def test_is_numeric_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -342,7 +342,7 @@ def test_is_integer_dtype(obj, expect): (int(), True), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -450,7 +450,7 @@ def test_is_integer(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -557,7 +557,7 @@ def test_is_string_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -664,7 +664,7 @@ def test_is_datetime_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -771,7 +771,7 @@ def test_is_list_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -881,7 +881,7 @@ def test_is_struct_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - ('', False), + ("", False), ("", False), (r"", False), (object(), False), @@ -988,7 +988,7 @@ def test_is_decimal_dtype(obj, expect): int(), float(), complex(), - '', + "", "", r"", object(), @@ -1070,7 +1070,7 @@ def test_pandas_agreement(obj): int(), float(), complex(), - '', + "", "", r"", object(), diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index 29060927d75..4807879a730 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -265,9 +265,9 @@ def test_read_parquet( # Check fsspec file-object handling buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): - fs = get_fs_token_paths( - f"s3://{bname}/{fname}", storage_options=s3so - )[0] + fs = get_fs_token_paths(f"s3://{bname}/{fname}", storage_options=s3so)[ + 0 + ] with fs.open(f"s3://{bname}/{fname}", mode="rb") as f: got2 = cudf.read_parquet( f, diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py index 58e0541d3db..11029cbfe5e 100644 --- a/python/cudf/cudf/utils/hash_vocab_utils.py +++ b/python/cudf/cudf/utils/hash_vocab_utils.py @@ -79,9 +79,7 @@ def _pick_initial_a_b(data, max_constant, init_bins): longest = _new_bin_length(_longest_bin_length(bins)) if score <= max_constant and longest <= MAX_SIZE_FOR_INITIAL_BIN: - print( - f"Attempting to build table using {score:.6f}n space" - ) + print(f"Attempting to build table using {score:.6f}n space") print(f"Longest bin was {longest}") break diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 7e49bcb60ce..e4e43bc1595 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -63,9 +63,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_version = None - with open( - os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8" - ) as f: + with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f: for line in f.readlines(): if re.search(r"#define CUDA_VERSION ", line) is not None: cuda_version = line diff --git a/python/cudf/versioneer.py b/python/cudf/versioneer.py index 6bb75d8ae85..bbf456a84ef 100644 --- a/python/cudf/versioneer.py +++ b/python/cudf/versioneer.py @@ -1770,7 +1770,11 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: + except ( + OSError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr diff --git a/python/cudf_kafka/versioneer.py b/python/cudf_kafka/versioneer.py index 253f0547a94..c7dbfd76734 100644 --- a/python/cudf_kafka/versioneer.py +++ b/python/cudf_kafka/versioneer.py @@ -1770,7 +1770,11 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: + except ( + OSError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr diff --git a/python/custreamz/versioneer.py b/python/custreamz/versioneer.py index d2a75c38787..2d1032e584e 100644 --- a/python/custreamz/versioneer.py +++ b/python/custreamz/versioneer.py @@ -1770,7 +1770,11 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: + except ( + OSError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 44534632117..635f21fd906 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -33,9 +33,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_version = None - with open( - os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8" - ) as f: + with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f: for line in f.readlines(): if re.search(r"#define CUDA_VERSION ", line) is not None: cuda_version = line diff --git a/python/dask_cudf/versioneer.py b/python/dask_cudf/versioneer.py index 0a66806cb6d..1e94791233c 100644 --- a/python/dask_cudf/versioneer.py +++ b/python/dask_cudf/versioneer.py @@ -1770,7 +1770,11 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: + except ( + OSError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr From bf0a47167eb6c55fa6a32026d2da01e2c4ac7e21 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 17:07:23 -0600 Subject: [PATCH 04/10] Remove extra line. --- docs/cudf/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 5de1d918a09..186fc252402 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# # Copyright (c) 2018-2021, NVIDIA CORPORATION. # # cudf documentation build configuration file, created by From bee1da511edb33c405a61aaec966b214818bc795 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 17:07:25 -0600 Subject: [PATCH 05/10] Revert "Run pyupgrade 2.31.0, only changes to typing." This reverts commit 54b16b94cc7b5e300960a2aedec1f477e505b992. --- python/cudf/cudf/api/types.py | 2 +- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/buffer.py | 4 +- python/cudf/cudf/core/column/categorical.py | 50 +++++------ python/cudf/cudf/core/column/column.py | 86 +++++++++---------- python/cudf/cudf/core/column/datetime.py | 19 ++-- python/cudf/cudf/core/column/methods.py | 2 +- python/cudf/cudf/core/column/numerical.py | 26 +++--- .../cudf/cudf/core/column/numerical_base.py | 4 +- python/cudf/cudf/core/column/string.py | 46 +++++----- python/cudf/cudf/core/column/struct.py | 2 +- python/cudf/cudf/core/column/timedelta.py | 28 +++--- python/cudf/cudf/core/column_accessor.py | 16 ++-- python/cudf/cudf/core/dataframe.py | 6 +- python/cudf/cudf/core/frame.py | 58 +++++++------ python/cudf/cudf/core/index.py | 22 ++--- python/cudf/cudf/core/indexed_frame.py | 6 +- python/cudf/cudf/core/join/_join_helpers.py | 4 +- python/cudf/cudf/core/multiindex.py | 14 +-- python/cudf/cudf/core/series.py | 8 +- python/cudf/cudf/core/single_column_frame.py | 14 ++- python/cudf/cudf/core/subword_tokenizer.py | 2 +- python/cudf/cudf/testing/testing.py | 2 +- 23 files changed, 220 insertions(+), 203 deletions(-) diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index 050a71d83f0..6d5387591cb 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -200,7 +200,7 @@ def wrapped_func(obj): def _union_categoricals( - to_union: list[cudf.Series | cudf.CategoricalIndex], + to_union: List[Union[cudf.Series, cudf.CategoricalIndex]], sort_categories: bool = False, ignore_order: bool = False, ): diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index a70681a06d3..5c5ccaf94c0 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -39,7 +39,7 @@ class BaseIndex(Serializable): """Base class for all cudf Index types.""" dtype: DtypeObj - _accessors: set[Any] = set() + _accessors: Set[Any] = set() _data: ColumnAccessor def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py index c5b4198581f..0658927975f 100644 --- a/python/cudf/cudf/core/buffer.py +++ b/python/cudf/cudf/core/buffer.py @@ -38,7 +38,7 @@ class Buffer(Serializable): _owner: Any def __init__( - self, data: Any = None, size: int | None = None, owner: Any = None + self, data: Any = None, size: Optional[int] = None, owner: Any = None ): if isinstance(data, Buffer): @@ -117,7 +117,7 @@ def _init_from_array_like(self, data, owner): f"Cannot construct Buffer from {data.__class__.__name__}" ) - def serialize(self) -> tuple[dict, list]: + def serialize(self) -> Tuple[dict, list]: header = {} # type: Dict[Any, Any] header["type-serialized"] = pickle.dumps(type(self)) header["constructor-kwargs"] = {} diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index bafcc19ab1d..de06e62cbb1 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -110,14 +110,14 @@ def __init__(self, parent: SeriesOrSingleColumnIndex): super().__init__(parent=parent) @property - def categories(self) -> cudf.core.index.BaseIndex: + def categories(self) -> "cudf.core.index.BaseIndex": """ The categories of this categorical. """ return cudf.core.index.as_index(self._column.categories) @property - def codes(self) -> cudf.Series: + def codes(self) -> "cudf.Series": """ Return Series of codes as well as the index. """ @@ -129,13 +129,13 @@ def codes(self) -> cudf.Series: return cudf.Series(self._column.codes, index=index) @property - def ordered(self) -> bool | None: + def ordered(self) -> Optional[bool]: """ Whether the categories have an ordered relationship. """ return self._column.ordered - def as_ordered(self, inplace: bool = False) -> SeriesOrIndex | None: + def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: """ Set the Categorical to be ordered. @@ -192,7 +192,7 @@ def as_ordered(self, inplace: bool = False) -> SeriesOrIndex | None: self._column.as_ordered(), inplace=inplace ) - def as_unordered(self, inplace: bool = False) -> SeriesOrIndex | None: + def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: """ Set the Categorical to be unordered. @@ -262,7 +262,7 @@ def as_unordered(self, inplace: bool = False) -> SeriesOrIndex | None: def add_categories( self, new_categories: Any, inplace: bool = False - ) -> SeriesOrIndex | None: + ) -> Optional[SeriesOrIndex]: """ Add new categories. @@ -347,7 +347,7 @@ def add_categories( def remove_categories( self, removals: Any, inplace: bool = False, - ) -> SeriesOrIndex | None: + ) -> Optional[SeriesOrIndex]: """ Remove the specified categories. @@ -441,7 +441,7 @@ def set_categories( ordered: bool = False, rename: bool = False, inplace: bool = False, - ) -> SeriesOrIndex | None: + ) -> Optional[SeriesOrIndex]: """ Set the categories to the specified new_categories. @@ -535,7 +535,7 @@ def reorder_categories( new_categories: Any, ordered: bool = False, inplace: bool = False, - ) -> SeriesOrIndex | None: + ) -> Optional[SeriesOrIndex]: """ Reorder categories as specified in new_categories. @@ -624,8 +624,8 @@ class CategoricalColumn(column.ColumnBase): """ dtype: cudf.core.dtypes.CategoricalDtype - _codes: NumericalColumn | None - _children: tuple[NumericalColumn] + _codes: Optional[NumericalColumn] + _children: Tuple[NumericalColumn] def __init__( self, @@ -634,7 +634,7 @@ def __init__( size: int = None, offset: int = 0, null_count: int = None, - children: tuple[column.ColumnBase, ...] = (), + children: Tuple["column.ColumnBase", ...] = (), ): if size is None: @@ -671,8 +671,8 @@ def __contains__(self, item: ScalarLike) -> bool: return False return self._encode(item) in self.as_numerical - def serialize(self) -> tuple[dict, list]: - header: dict[Any, Any] = {} + def serialize(self) -> Tuple[dict, list]: + header: Dict[Any, Any] = {} frames = [] header["type-serialized"] = pickle.dumps(type(self)) header["dtype"], dtype_frames = self.dtype.serialize() @@ -729,23 +729,23 @@ def set_base_data(self, value): def _process_values_for_isin( self, values: Sequence - ) -> tuple[ColumnBase, ColumnBase]: + ) -> Tuple[ColumnBase, ColumnBase]: lhs = self # We need to convert values to same type as self, # hence passing dtype=self.dtype rhs = cudf.core.column.as_column(values, dtype=self.dtype) return lhs, rhs - def set_base_mask(self, value: Buffer | None): + def set_base_mask(self, value: Optional[Buffer]): super().set_base_mask(value) self._codes = None - def set_base_children(self, value: tuple[ColumnBase, ...]): + def set_base_children(self, value: Tuple[ColumnBase, ...]): super().set_base_children(value) self._codes = None @property - def children(self) -> tuple[NumericalColumn]: + def children(self) -> Tuple[NumericalColumn]: if self._children is None: codes_column = self.base_children[0] @@ -788,7 +788,7 @@ def codes(self) -> NumericalColumn: return cast(cudf.core.column.NumericalColumn, self._codes) @property - def ordered(self) -> bool | None: + def ordered(self) -> Optional[bool]: return self.dtype.ordered @ordered.setter @@ -842,7 +842,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> column.ColumnBase: + ) -> "column.ColumnBase": if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -858,7 +858,7 @@ def _fill( def slice( self, start: int, stop: int, stride: int = None - ) -> column.ColumnBase: + ) -> "column.ColumnBase": codes = self.codes.slice(start, stop, stride) return cudf.core.column.build_categorical_column( categories=self.categories, @@ -909,7 +909,7 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: def sort_by_values( self, ascending: bool = True, na_position="last" - ) -> tuple[CategoricalColumn, NumericalColumn]: + ) -> Tuple[CategoricalColumn, NumericalColumn]: codes, inds = self.as_numerical.sort_by_values(ascending, na_position) col = column.build_categorical_column( categories=self.dtype.categories._values, @@ -991,7 +991,7 @@ def values(self): """ raise NotImplementedError("cudf.Categorical is not yet implemented") - def clip(self, lo: ScalarLike, hi: ScalarLike) -> column.ColumnBase: + def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase": return ( self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype) ) @@ -1329,7 +1329,7 @@ def memory_usage(self) -> int: def _mimic_inplace( self, other_col: ColumnBase, inplace: bool = False - ) -> ColumnBase | None: + ) -> Optional[ColumnBase]: out = super()._mimic_inplace(other_col, inplace=inplace) if inplace and isinstance(other_col, CategoricalColumn): self._codes = other_col._codes @@ -1572,7 +1572,7 @@ def as_unordered(self): def _create_empty_categorical_column( - categorical_column: CategoricalColumn, dtype: CategoricalDtype + categorical_column: CategoricalColumn, dtype: "CategoricalDtype" ) -> CategoricalColumn: return column.build_categorical_column( categories=column.as_column(dtype.categories), diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index ed19264b228..7999fa9039b 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -83,7 +83,7 @@ class ColumnBase(Column, Serializable): - def as_frame(self) -> cudf.core.frame.Frame: + def as_frame(self) -> "cudf.core.frame.Frame": """ Converts a Column to Frame """ @@ -92,14 +92,14 @@ def as_frame(self) -> cudf.core.frame.Frame: ) @property - def data_array_view(self) -> cuda.devicearray.DeviceNDArray: + def data_array_view(self) -> "cuda.devicearray.DeviceNDArray": """ View the data as a device array object """ return cuda.as_cuda_array(self.data).view(self.dtype) @property - def mask_array_view(self) -> cuda.devicearray.DeviceNDArray: + def mask_array_view(self) -> "cuda.devicearray.DeviceNDArray": """ View the mask as a device array """ @@ -115,7 +115,7 @@ def __repr__(self): f"dtype: {self.dtype}" ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: + def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -134,7 +134,7 @@ def __iter__(self): cudf.utils.utils.raise_iteration_error(obj=self) @property - def values_host(self) -> np.ndarray: + def values_host(self) -> "np.ndarray": """ Return a numpy representation of the Column. """ @@ -147,7 +147,7 @@ def values_host(self) -> np.ndarray: return self.data_array_view.copy_to_host() @property - def values(self) -> cupy.ndarray: + def values(self) -> "cupy.ndarray": """ Return a CuPy representation of the Column. """ @@ -319,7 +319,7 @@ def _default_na_value(self) -> Any: # TODO: This method is deprecated and can be removed when the associated # Frame methods are removed. - def to_gpu_array(self, fillna=None) -> cuda.devicearray.DeviceNDArray: + def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray": """Get a dense numba device array for the data. Parameters @@ -365,7 +365,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> ColumnBase | None: + ) -> Optional[ColumnBase]: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -517,7 +517,7 @@ def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase: ) return self.take(gather_map) - def __getitem__(self, arg) -> ScalarLike | ColumnBase: + def __getitem__(self, arg) -> Union[ScalarLike, ColumnBase]: if _is_scalar_or_zero_d_array(arg): return self.element_indexing(int(arg)) elif isinstance(arg, slice): @@ -677,7 +677,7 @@ def append(self, other: ColumnBase) -> ColumnBase: def quantile( self, - q: float | Sequence[float], + q: Union[float, Sequence[float]], interpolation: builtins.str, exact: bool, ) -> ColumnBase: @@ -740,7 +740,7 @@ def isin(self, values: Sequence) -> ColumnBase: def _process_values_for_isin( self, values: Sequence - ) -> tuple[ColumnBase, ColumnBase]: + ) -> Tuple[ColumnBase, ColumnBase]: """ Helper function for `isin` which pre-process `values` based on `self`. """ @@ -752,7 +752,7 @@ def _process_values_for_isin( rhs = rhs.astype(lhs.dtype) return lhs, rhs - def _isin_earlystop(self, rhs: ColumnBase) -> ColumnBase | None: + def _isin_earlystop(self, rhs: ColumnBase) -> Union[ColumnBase, None]: """ Helper function for `isin` which determines possibility of early-stopping or not. @@ -847,7 +847,7 @@ def sort_by_values( self: ColumnBase, ascending: bool = True, na_position: builtins.str = "last", - ) -> tuple[ColumnBase, cudf.core.column.NumericalColumn]: + ) -> Tuple[ColumnBase, "cudf.core.column.NumericalColumn"]: col_inds = self.as_frame()._get_sorted_inds( ascending=ascending, na_position=na_position ) @@ -960,47 +960,47 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase: def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.NumericalColumn: + ) -> "cudf.core.column.NumericalColumn": raise NotImplementedError def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.DatetimeColumn: + ) -> "cudf.core.column.DatetimeColumn": raise NotImplementedError def as_interval_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.IntervalColumn: + ) -> "cudf.core.column.IntervalColumn": raise NotImplementedError def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.TimeDeltaColumn: + ) -> "cudf.core.column.TimeDeltaColumn": raise NotImplementedError def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> cudf.core.column.StringColumn: + ) -> "cudf.core.column.StringColumn": raise NotImplementedError def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.decimal.DecimalBaseColumn: + ) -> Union["cudf.core.column.decimal.DecimalBaseColumn"]: raise NotImplementedError def as_decimal128_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.Decimal128Column: + ) -> "cudf.core.column.Decimal128Column": raise NotImplementedError def as_decimal64_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.Decimal64Column: + ) -> "cudf.core.column.Decimal64Column": raise NotImplementedError def as_decimal32_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.Decimal32Column: + ) -> "cudf.core.column.Decimal32Column": raise NotImplementedError def apply_boolean_mask(self, mask) -> ColumnBase: @@ -1110,8 +1110,8 @@ def unique(self) -> ColumnBase: return drop_duplicates([self], keep="first")[0] - def serialize(self) -> tuple[dict, list]: - header: dict[Any, Any] = {} + def serialize(self) -> Tuple[dict, list]: + header: Dict[Any, Any] = {} frames = [] header["type-serialized"] = pickle.dumps(type(self)) header["dtype"] = self.dtype.str @@ -1155,7 +1155,7 @@ def binary_operator( def normalize_binop_value( self, other: ScalarLike - ) -> ColumnBase | ScalarLike: + ) -> Union[ColumnBase, ScalarLike]: raise NotImplementedError def _minmax(self, skipna: bool = None): @@ -1217,7 +1217,7 @@ def nans_to_nulls(self: T) -> T: def _process_for_reduction( self, skipna: bool = None, min_count: int = 0 - ) -> ColumnBase | ScalarLike: + ) -> Union[ColumnBase, ScalarLike]: skipna = True if skipna is None else skipna if skipna: @@ -1347,14 +1347,14 @@ def column_empty( def build_column( - data: Buffer | None, + data: Union[Buffer, None], dtype: Dtype, *, size: int = None, mask: Buffer = None, offset: int = 0, null_count: int = None, - children: tuple[ColumnBase, ...] = (), + children: Tuple[ColumnBase, ...] = (), ) -> ColumnBase: """ Build a Column of the appropriate type from the given parameters @@ -1516,7 +1516,7 @@ def build_categorical_column( offset: int = 0, null_count: int = None, ordered: bool = None, -) -> cudf.core.column.CategoricalColumn: +) -> "cudf.core.column.CategoricalColumn": """ Build a CategoricalColumn @@ -1606,7 +1606,7 @@ def build_list_column( size: int = None, offset: int = 0, null_count: int = None, -) -> cudf.core.column.ListColumn: +) -> "cudf.core.column.ListColumn": """ Build a ListColumn @@ -1638,13 +1638,13 @@ def build_list_column( def build_struct_column( names: Sequence[str], - children: tuple[ColumnBase, ...], - dtype: Dtype | None = None, + children: Tuple[ColumnBase, ...], + dtype: Optional[Dtype] = None, mask: Buffer = None, size: int = None, offset: int = 0, null_count: int = None, -) -> cudf.core.column.StructColumn: +) -> "cudf.core.column.StructColumn": """ Build a StructColumn @@ -2177,8 +2177,8 @@ def as_column( def _construct_array( - arbitrary: Any, dtype: Dtype | None -) -> np.ndarray | cupy.ndarray: + arbitrary: Any, dtype: Optional[Dtype] +) -> Union[np.ndarray, cupy.ndarray]: """ Construct a CuPy or NumPy array from `arbitrary` """ @@ -2212,7 +2212,7 @@ def _data_from_cuda_array_interface_desc(obj) -> Buffer: return data -def _mask_from_cuda_array_interface_desc(obj) -> Buffer | None: +def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]: desc = obj.__cuda_array_interface__ mask = desc.get("mask", None) @@ -2235,7 +2235,7 @@ def _mask_from_cuda_array_interface_desc(obj) -> Buffer | None: return mask -def serialize_columns(columns) -> tuple[list[dict], list]: +def serialize_columns(columns) -> Tuple[List[dict], List]: """ Return the headers and frames resulting from serializing a list of Column @@ -2250,7 +2250,7 @@ def serialize_columns(columns) -> tuple[list[dict], list]: frames : list list of frames """ - headers: list[dict[Any, Any]] = [] + headers: List[Dict[Any, Any]] = [] frames = [] if len(columns) > 0: @@ -2262,7 +2262,7 @@ def serialize_columns(columns) -> tuple[list[dict], list]: return headers, frames -def deserialize_columns(headers: list[dict], frames: list) -> list[ColumnBase]: +def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: """ Construct a list of Columns from a list of headers and frames. @@ -2281,9 +2281,9 @@ def deserialize_columns(headers: list[dict], frames: list) -> list[ColumnBase]: def arange( - start: int | float, - stop: int | float = None, - step: int | float = 1, + start: Union[int, float], + stop: Union[int, float] = None, + step: Union[int, float] = 1, dtype=None, ) -> ColumnBase: """ @@ -2372,7 +2372,7 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase: return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size) -def concat_columns(objs: MutableSequence[ColumnBase]) -> ColumnBase: +def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: """Concatenate a sequence of columns.""" if len(objs) == 0: dtype = cudf.dtype(None) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 75b50f7e12e..b763790986a 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -193,7 +193,7 @@ def day_of_year(self) -> ColumnBase: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> cudf.Series: + ) -> "cudf.Series": # Workaround until following issue is fixed: # https://issues.apache.org/jira/browse/ARROW-9772 @@ -264,7 +264,7 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: raise TypeError(f"cannot normalize {type(other)}") @property - def as_numerical(self) -> cudf.core.column.NumericalColumn: + def as_numerical(self) -> "cudf.core.column.NumericalColumn": return cast( "cudf.core.column.NumericalColumn", column.build_column( @@ -311,21 +311,21 @@ def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn: def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.TimeDeltaColumn: + ) -> "cudf.core.column.TimeDeltaColumn": raise TypeError( f"cannot astype a datetimelike from {self.dtype} to {dtype}" ) def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.NumericalColumn: + ) -> "cudf.core.column.NumericalColumn": return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> cudf.core.column.StringColumn: + ) -> "cudf.core.column.StringColumn": if format is None: format = _dtype_to_format_conversion.get( self.dtype.name, "%Y-%m-%d %H:%M:%S" @@ -370,7 +370,7 @@ def median(self, skipna: bool = None) -> pd.Timestamp: ) def quantile( - self, q: float | Sequence[float], interpolation: str, exact: bool + self, q: Union[float, Sequence[float]], interpolation: str, exact: bool ) -> ColumnBase: result = self.as_numerical.quantile( q=q, interpolation=interpolation, exact=exact @@ -380,11 +380,14 @@ def quantile( return result.astype(self.dtype) def binary_operator( - self, op: str, rhs: ColumnBase | cudf.Scalar, reflect: bool = False, + self, + op: str, + rhs: Union[ColumnBase, "cudf.Scalar"], + reflect: bool = False, ) -> ColumnBase: if isinstance(rhs, cudf.DateOffset): return rhs._datetime_binop(self, op, reflect=reflect) - lhs: ScalarLike | ColumnBase = self + lhs: Union[ScalarLike, ColumnBase] = self if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"): out_dtype = cudf.dtype(np.bool_) # type: Dtype elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype): diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index 6f0ffae3343..9bea94cfecb 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -51,7 +51,7 @@ def _return_or_inplace( inplace: bool = False, expand: bool = False, retain_index: bool = True, - ) -> ParentType | None: + ) -> Optional[ParentType]: ... def _return_or_inplace( diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 730e223f111..a7481ce62a3 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -61,7 +61,7 @@ class NumericalColumn(NumericalBaseColumn): mask : Buffer, optional """ - _nan_count: int | None + _nan_count: Optional[int] def __init__( self, @@ -142,7 +142,7 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: return output - def unary_operator(self, unaryop: str | Callable) -> ColumnBase: + def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: if callable(unaryop): return libcudf.transform.transform(self, unaryop) @@ -179,7 +179,7 @@ def binary_operator( msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) if isinstance(rhs, cudf.core.column.Decimal128Column): - lhs: ScalarLike | ColumnBase = self.as_decimal_column( + lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column( Decimal128Dtype(Decimal128Dtype.MAX_PRECISION, 0) ) return lhs.binary_operator(binop, rhs) @@ -226,7 +226,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: def normalize_binop_value( self, other: ScalarLike - ) -> ColumnBase | ScalarLike: + ) -> Union[ColumnBase, ScalarLike]: if other is None: return other if isinstance(other, cudf.Scalar): @@ -259,7 +259,7 @@ def normalize_binop_value( else: raise TypeError(f"cannot broadcast {type(other)}") - def int2ip(self) -> cudf.core.column.StringColumn: + def int2ip(self) -> "cudf.core.column.StringColumn": if self.dtype != cudf.dtype("int64"): raise TypeError("Only int64 type can be converted to ip") @@ -267,7 +267,7 @@ def int2ip(self) -> cudf.core.column.StringColumn: def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> cudf.core.column.StringColumn: + ) -> "cudf.core.column.StringColumn": if len(self) > 0: return string._numeric_to_str_typecast_functions[ cudf.dtype(self.dtype) @@ -279,7 +279,7 @@ def as_string_column( def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.DatetimeColumn: + ) -> "cudf.core.column.DatetimeColumn": return cast( "cudf.core.column.DatetimeColumn", build_column( @@ -293,7 +293,7 @@ def as_datetime_column( def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.TimeDeltaColumn: + ) -> "cudf.core.column.TimeDeltaColumn": return cast( "cudf.core.column.TimeDeltaColumn", build_column( @@ -307,7 +307,7 @@ def as_timedelta_column( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.DecimalBaseColumn: + ) -> "cudf.core.column.DecimalBaseColumn": return libcudf.unary.cast(self, dtype) def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn: @@ -327,7 +327,7 @@ def nan_count(self) -> int: def _process_values_for_isin( self, values: Sequence - ) -> tuple[ColumnBase, ColumnBase]: + ) -> Tuple[ColumnBase, ColumnBase]: lhs = cast("cudf.core.column.ColumnBase", self) rhs = as_column(values, nan_as_null=False) @@ -346,7 +346,7 @@ def _can_return_nan(self, skipna: bool = None) -> bool: def _process_for_reduction( self, skipna: bool = None, min_count: int = 0 - ) -> ColumnBase | ScalarLike: + ) -> Union[ColumnBase, ScalarLike]: skipna = True if skipna is None else skipna if self._can_return_nan(skipna=skipna): @@ -642,7 +642,7 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> pd.Series: + ) -> "pd.Series": if nullable and self.dtype in np_dtypes_to_pandas_dtypes: pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype] arrow_array = self.to_arrow() @@ -670,7 +670,7 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype: def _normalize_find_and_replace_input( - input_column_dtype: DtypeObj, col_to_normalize: ColumnBase | list + input_column_dtype: DtypeObj, col_to_normalize: Union[ColumnBase, list] ) -> ColumnBase: normalized_column = column.as_column( col_to_normalize, diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index e5918578fbf..1f84cb88e37 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -129,7 +129,7 @@ def skew(self, skipna: bool = None) -> ScalarLike: return skew def quantile( - self, q: float | Sequence[float], interpolation: str, exact: bool + self, q: Union[float, Sequence[float]], interpolation: str, exact: bool ) -> NumericalBaseColumn: if isinstance(q, Number) or cudf.api.types.is_list_like(q): np_array_q = np.asarray(q) @@ -158,7 +158,7 @@ def median(self, skipna: bool = None) -> NumericalBaseColumn: return self.quantile(0.5, interpolation="linear", exact=True) def _numeric_quantile( - self, q: float | Sequence[float], interpolation: str, exact: bool + self, q: Union[float, Sequence[float]], interpolation: str, exact: bool ) -> NumericalBaseColumn: quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q # get sorted indices and exclude nulls diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 7a00c7401d2..7f62d8fe03f 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -257,7 +257,7 @@ def cat(self, sep: str = None, na_rep: str = None) -> str: @overload def cat( self, others, sep: str = None, na_rep: str = None - ) -> SeriesOrIndex | cudf.core.column.string.StringColumn: + ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]: ... def cat(self, others=None, sep=None, na_rep=None): @@ -630,7 +630,7 @@ def extract( def contains( self, - pat: str | Sequence, + pat: Union[str, Sequence], case: bool = True, flags: int = 0, na=np.nan, @@ -771,7 +771,7 @@ def contains( ) return self._return_or_inplace(result_col) - def repeat(self, repeats: int | Sequence,) -> SeriesOrIndex: + def repeat(self, repeats: Union[int, Sequence],) -> SeriesOrIndex: """ Duplicate each string in the Series or Index. Equivalent to `str.repeat() @@ -826,8 +826,8 @@ def repeat(self, repeats: int | Sequence,) -> SeriesOrIndex: def replace( self, - pat: str | Sequence, - repl: str | Sequence, + pat: Union[str, Sequence], + repl: Union[str, Sequence], n: int = -1, case=None, flags: int = 0, @@ -1988,7 +1988,7 @@ def filter_alphanum( ) def slice_from( - self, starts: cudf.Series, stops: cudf.Series + self, starts: "cudf.Series", stops: "cudf.Series" ) -> SeriesOrIndex: """ Return substring of each string using positions for each string. @@ -3608,7 +3608,7 @@ def endswith(self, pat: str) -> SeriesOrIndex: return self._return_or_inplace(result_col) - def startswith(self, pat: str | Sequence) -> SeriesOrIndex: + def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex: """ Test if the start of each string element matches a pattern. @@ -4285,7 +4285,7 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex: ) def detokenize( - self, indices: cudf.Series, separator: str = " " + self, indices: "cudf.Series", separator: str = " " ) -> SeriesOrIndex: """ Combines tokens into strings by concatenating them in the order @@ -4958,9 +4958,9 @@ class StringColumn(column.ColumnBase): respectively """ - _start_offset: int | None - _end_offset: int | None - _cached_sizeof: int | None + _start_offset: Optional[int] + _end_offset: Optional[int] + _cached_sizeof: Optional[int] def __init__( self, @@ -4968,7 +4968,7 @@ def __init__( size: int = None, # TODO: make non-optional offset: int = 0, null_count: int = None, - children: tuple[column.ColumnBase, ...] = (), + children: Tuple["column.ColumnBase", ...] = (), ): dtype = cudf.dtype("object") @@ -5123,7 +5123,7 @@ def __contains__(self, item: ScalarLike) -> bool: def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.NumericalColumn: + ) -> "cudf.core.column.NumericalColumn": out_dtype = cudf.dtype(dtype) string_col = self if out_dtype.kind in {"i", "u"}: @@ -5165,7 +5165,7 @@ def _as_datetime_or_timedelta_column(self, dtype, format): def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.DatetimeColumn: + ) -> "cudf.core.column.DatetimeColumn": out_dtype = cudf.dtype(dtype) # infer on host from the first not na element @@ -5189,14 +5189,14 @@ def as_datetime_column( def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.TimeDeltaColumn: + ) -> "cudf.core.column.TimeDeltaColumn": out_dtype = cudf.dtype(dtype) format = "%D days %H:%M:%S" return self._as_datetime_or_timedelta_column(out_dtype, format) def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.DecimalBaseColumn: + ) -> "cudf.core.column.DecimalBaseColumn": return libstrings.to_decimal(self, dtype) def as_string_column( @@ -5240,7 +5240,7 @@ def to_array(self, fillna: bool = None) -> np.ndarray: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> pd.Series: + ) -> "pd.Series": if nullable: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) pd_series = pd.Series(pandas_array, copy=False) @@ -5251,8 +5251,8 @@ def to_pandas( pd_series.index = index return pd_series - def serialize(self) -> tuple[dict, list]: - header: dict[Any, Any] = {"null_count": self.null_count} + def serialize(self) -> Tuple[dict, list]: + header: Dict[Any, Any] = {"null_count": self.null_count} header["type-serialized"] = pickle.dumps(type(self)) header["size"] = self.size @@ -5366,7 +5366,7 @@ def fillna( else: return super().fillna(method=method) - def _find_first_and_last(self, value: ScalarLike) -> tuple[int, int]: + def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]: found_indices = libcudf.search.contains( self, column.as_column([value], dtype=self.dtype) ) @@ -5383,7 +5383,7 @@ def find_first_value( def find_last_value(self, value: ScalarLike, closest: bool = False) -> int: return self._find_first_and_last(value)[1] - def normalize_binop_value(self, other) -> column.ColumnBase: + def normalize_binop_value(self, other) -> "column.ColumnBase": # fastpath: gpu scalar if isinstance(other, cudf.Scalar) and other.dtype == "object": return column.as_column(other, length=len(self)) @@ -5407,7 +5407,7 @@ def _default_na_value(self) -> ScalarLike: def binary_operator( self, op: builtins.str, rhs, reflect: bool = False - ) -> column.ColumnBase: + ) -> "column.ColumnBase": lhs = self if reflect: lhs, rhs = rhs, lhs @@ -5431,7 +5431,7 @@ def binary_operator( ) @copy_docstring(column.ColumnBase.view) - def view(self, dtype) -> cudf.core.column.ColumnBase: + def view(self, dtype) -> "cudf.core.column.ColumnBase": if self.null_count > 0: raise ValueError( "Can not produce a view of a string column with nulls" diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index c344ab3739d..f0d02a706e2 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -81,7 +81,7 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: + def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index da485a144bc..4b7a3bcc197 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -133,7 +133,7 @@ def to_pandas( def _binary_op_floordiv( self, rhs: BinaryOperand - ) -> tuple[column.ColumnBase, BinaryOperand, DtypeObj]: + ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -203,7 +203,7 @@ def _binary_op_lt_gt_le_ge(self, rhs: BinaryOperand) -> DtypeObj: def _binary_op_truediv( self, rhs: BinaryOperand - ) -> tuple[column.ColumnBase, BinaryOperand, DtypeObj]: + ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -229,7 +229,7 @@ def _binary_op_truediv( def binary_operator( self, op: str, rhs: BinaryOperand, reflect: bool = False - ) -> column.ColumnBase: + ) -> "column.ColumnBase": lhs, rhs = self, rhs if op in ("eq", "ne"): @@ -292,7 +292,7 @@ def normalize_binop_value(self, other) -> BinaryOperand: raise TypeError(f"cannot normalize {type(other)}") @property - def as_numerical(self) -> cudf.core.column.NumericalColumn: + def as_numerical(self) -> "cudf.core.column.NumericalColumn": return cast( "cudf.core.column.NumericalColumn", column.build_column( @@ -334,21 +334,21 @@ def fillna( def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.NumericalColumn: + ) -> "cudf.core.column.NumericalColumn": return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> cudf.core.column.DatetimeColumn: + ) -> "cudf.core.column.DatetimeColumn": raise TypeError( f"cannot astype a timedelta from {self.dtype} to {dtype}" ) def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> cudf.core.column.StringColumn: + ) -> "cudf.core.column.StringColumn": if format is None: format = _dtype_to_format_conversion.get( self.dtype.name, "%D days %H:%M:%S" @@ -384,8 +384,8 @@ def isin(self, values: Sequence) -> ColumnBase: return cudf.core.tools.datetimes._isin_datetimelike(self, values) def quantile( - self, q: float | Sequence[float], interpolation: str, exact: bool - ) -> column.ColumnBase: + self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + ) -> "column.ColumnBase": result = self.as_numerical.quantile( q=q, interpolation=interpolation, exact=exact ) @@ -411,7 +411,7 @@ def std( unit=self.time_unit, ) - def components(self, index=None) -> cudf.DataFrame: + def components(self, index=None) -> "cudf.DataFrame": """ Return a Dataframe of the components of the Timedeltas. @@ -505,7 +505,7 @@ def components(self, index=None) -> cudf.DataFrame: ) @property - def days(self) -> cudf.core.column.NumericalColumn: + def days(self) -> "cudf.core.column.NumericalColumn": """ Number of days for each element. @@ -518,7 +518,7 @@ def days(self) -> cudf.core.column.NumericalColumn: ) @property - def seconds(self) -> cudf.core.column.NumericalColumn: + def seconds(self) -> "cudf.core.column.NumericalColumn": """ Number of seconds (>= 0 and less than 1 day). @@ -541,7 +541,7 @@ def seconds(self) -> cudf.core.column.NumericalColumn: ) @property - def microseconds(self) -> cudf.core.column.NumericalColumn: + def microseconds(self) -> "cudf.core.column.NumericalColumn": """ Number of microseconds (>= 0 and less than 1 second). @@ -561,7 +561,7 @@ def microseconds(self) -> cudf.core.column.NumericalColumn: ) @property - def nanoseconds(self) -> cudf.core.column.NumericalColumn: + def nanoseconds(self) -> "cudf.core.column.NumericalColumn": """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index d836dc5b2db..c2ea9d756f7 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -94,13 +94,13 @@ class ColumnAccessor(MutableMapping): may be passe. """ - _data: dict[Any, ColumnBase] + _data: "Dict[Any, ColumnBase]" multiindex: bool - _level_names: tuple[Any, ...] + _level_names: Tuple[Any, ...] def __init__( self, - data: MutableMapping | ColumnAccessor = None, + data: Union[MutableMapping, ColumnAccessor] = None, multiindex: bool = False, level_names=None, ): @@ -137,7 +137,7 @@ def __init__( @classmethod def _create_unsafe( cls, - data: dict[Any, ColumnBase], + data: Dict[Any, ColumnBase], multiindex: bool = False, level_names=None, ) -> ColumnAccessor: @@ -177,7 +177,7 @@ def __repr__(self) -> str: return f"{type_info}\n{column_info}" @property - def level_names(self) -> tuple[Any, ...]: + def level_names(self) -> Tuple[Any, ...]: if self._level_names is None or len(self._level_names) == 0: return tuple((None,) * max(1, self.nlevels)) else: @@ -206,11 +206,11 @@ def nrows(self) -> int: return len(next(iter(self.values()))) @cached_property - def names(self) -> tuple[Any, ...]: + def names(self) -> Tuple[Any, ...]: return tuple(self.keys()) @cached_property - def columns(self) -> tuple[ColumnBase, ...]: + def columns(self) -> Tuple[ColumnBase, ...]: return tuple(self.values()) @cached_property @@ -463,7 +463,7 @@ def _pad_key(self, key: Any, pad_value="") -> Any: return key + (pad_value,) * (self.nlevels - len(key)) def rename_levels( - self, mapper: Mapping[Any, Any] | Callable, level: int | None + self, mapper: Union[Mapping[Any, Any], Callable], level: Optional[int] ) -> ColumnAccessor: """ Rename the specified levels of the given ColumnAccessor diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 336b659d115..bd08ac385c7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -503,7 +503,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): """ _PROTECTED_KEYS = frozenset(("_data", "_index")) - _accessors: set[Any] = set() + _accessors: Set[Any] = set() _loc_indexer_type = _DataFrameLocIndexer _iloc_indexer_type = _DataFrameIlocIndexer @@ -822,7 +822,7 @@ def _init_from_dict_like( def _from_data( cls, data: MutableMapping, - index: BaseIndex | None = None, + index: Optional[BaseIndex] = None, columns: Any = None, ) -> DataFrame: out = super()._from_data(data, index) @@ -6433,7 +6433,7 @@ def _setitem_with_dataframe( input_df: DataFrame, replace_df: DataFrame, input_cols: Any = None, - mask: cudf.core.column.ColumnBase | None = None, + mask: Optional[cudf.core.column.ColumnBase] = None, ignore_index: bool = False, ): """ diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 9d86aa30b16..69dc5389e7a 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -65,11 +65,11 @@ class Frame: A Frame representing the (optional) index columns. """ - _data: ColumnAccessor + _data: "ColumnAccessor" # TODO: Once all dependence on Frame having an index is removed, this # attribute should be moved to IndexedFrame. - _index: cudf.core.index.BaseIndex | None - _names: list | None + _index: Optional[cudf.core.index.BaseIndex] + _names: Optional[List] def __init__(self, data=None, index=None): if data is None: @@ -90,11 +90,11 @@ def _num_rows(self) -> int: return len(self._data.columns[0]) @property - def _column_names(self) -> list[Any]: # TODO: List[str]? + def _column_names(self) -> List[Any]: # TODO: List[str]? return self._data.names @property - def _index_names(self) -> list[Any]: # TODO: List[str]? + def _index_names(self) -> List[Any]: # TODO: List[str]? # TODO: Temporarily suppressing mypy warnings to avoid introducing bugs # by returning an empty list where one is not expected. return ( @@ -104,7 +104,7 @@ def _index_names(self) -> list[Any]: # TODO: List[str]? ) @property - def _columns(self) -> list[Any]: # TODO: List[Column]? + def _columns(self) -> List[Any]: # TODO: List[Column]? return self._data.columns def serialize(self): @@ -126,7 +126,7 @@ def deserialize(cls, header, frames): def _from_data( cls, data: MutableMapping, - index: cudf.core.index.BaseIndex | None = None, + index: Optional[cudf.core.index.BaseIndex] = None, ): obj = cls.__new__(cls) Frame.__init__(obj, data, index) @@ -135,9 +135,9 @@ def _from_data( @classmethod def _from_columns( cls, - columns: list[ColumnBase], - column_names: list[str], - index_names: list[str] | None = None, + columns: List[ColumnBase], + column_names: List[str], + index_names: Optional[List[str]] = None, ): """Construct a `Frame` object from a list of columns. @@ -165,9 +165,9 @@ def _from_columns( def _from_columns_like_self( self, - columns: list[ColumnBase], - column_names: list[str], - index_names: list[str] | None = None, + columns: List[ColumnBase], + column_names: List[str], + index_names: Optional[List[str]] = None, ): """Construct a `Frame` from a list of columns with metadata from self. @@ -181,7 +181,7 @@ def _from_columns_like_self( def _mimic_inplace( self: T, result: Frame, inplace: bool = False - ) -> Frame | None: + ) -> Optional[Frame]: if inplace: for col in self._data: if col in result._data: @@ -616,9 +616,9 @@ def _to_array( self, get_column_values: Callable, make_empty_matrix: Callable, - dtype: Dtype | None = None, + dtype: Union[Dtype, None] = None, na_value=None, - ) -> cupy.ndarray | np.ndarray: + ) -> Union[cupy.ndarray, np.ndarray]: # Internal function to implement to_cupy and to_numpy, which are nearly # identical except for the attribute they access to generate values. @@ -650,7 +650,10 @@ def get_column_values_na(col): return matrix def to_cupy( - self, dtype: Dtype | None = None, copy: bool = False, na_value=None, + self, + dtype: Union[Dtype, None] = None, + copy: bool = False, + na_value=None, ) -> cupy.ndarray: """Convert the Frame to a CuPy array. @@ -681,7 +684,10 @@ def to_cupy( ) def to_numpy( - self, dtype: Dtype | None = None, copy: bool = True, na_value=None, + self, + dtype: Union[Dtype, None] = None, + copy: bool = True, + na_value=None, ) -> np.ndarray: """Convert the Frame to a NumPy array. @@ -3434,7 +3440,7 @@ def _binaryop( @classmethod def _colwise_binop( cls, - operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]], + operands: Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], fn: str, ): """Implement binary ops between two frame-like objects. @@ -6432,8 +6438,8 @@ def ge(self, other, axis="columns", level=None, fill_value=None): def _get_replacement_values_for_columns( - to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any] -) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]: + to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any] +) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]: """ Returns a per column mapping for the values to be replaced, new values to be replaced with and if all the values are empty. @@ -6458,9 +6464,9 @@ def _get_replacement_values_for_columns( A dict mapping of all columns and the corresponding values to be replaced with. """ - to_replace_columns: dict[Any, Any] = {} - values_columns: dict[Any, Any] = {} - all_na_columns: dict[Any, Any] = {} + to_replace_columns: Dict[Any, Any] = {} + values_columns: Dict[Any, Any] = {} + all_na_columns: Dict[Any, Any] = {} if is_scalar(to_replace) and is_scalar(value): to_replace_columns = {col: [to_replace] for col in columns_dtype_map} @@ -6597,8 +6603,8 @@ def _is_series(obj): def _drop_rows_by_labels( obj: DataFrameOrSeries, - labels: ColumnLike | abc.Iterable | str, - level: int | str, + labels: Union[ColumnLike, abc.Iterable, str], + level: Union[int, str], errors: str, ) -> DataFrameOrSeries: """Remove rows specified by `labels`. If `errors=True`, an error is raised diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 983525e4ea2..88371666ce6 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -60,8 +60,10 @@ def _lexsorted_equal_range( - idx: GenericIndex | cudf.MultiIndex, key_as_table: Frame, is_sorted: bool, -) -> tuple[int, int, ColumnBase | None]: + idx: Union[GenericIndex, cudf.MultiIndex], + key_as_table: Frame, + is_sorted: bool, +) -> Tuple[int, int, Optional[ColumnBase]]: """Get equal range for key in lexicographically sorted index. If index is not sorted when called, a sort will take place and `sort_inds` is returned. Otherwise `None` is returned in that position. @@ -92,8 +94,8 @@ def _index_from_data(data: MutableMapping, name: Any = None): if isinstance(values, NumericalColumn): try: - index_class_type: type[ - GenericIndex | cudf.MultiIndex + index_class_type: Type[ + Union[GenericIndex, cudf.MultiIndex] ] = _dtype_to_index[values.dtype.type] except KeyError: index_class_type = GenericIndex @@ -113,7 +115,7 @@ def _index_from_data(data: MutableMapping, name: Any = None): def _index_from_columns( - columns: list[cudf.core.column.ColumnBase], name: Any = None + columns: List[cudf.core.column.ColumnBase], name: Any = None ): """Construct an index from ``columns``, with levels named 0, 1, 2...""" return _index_from_data(dict(zip(range(len(columns)), columns)), name=name) @@ -1211,7 +1213,7 @@ class NumericIndex(GenericIndex): """ # Subclasses must define the dtype they are associated with. - _dtype: None | type[np.number] = None + _dtype: Union[None, Type[np.number]] = None def __init__(self, data=None, dtype=None, copy=False, name=None): @@ -2244,7 +2246,7 @@ def is_categorical(self): def interval_range( start=None, end=None, periods=None, freq=None, name=None, closed="right", -) -> IntervalIndex: +) -> "IntervalIndex": """ Returns a fixed frequency IntervalIndex. @@ -2591,7 +2593,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex: ) -_dtype_to_index: dict[Any, type[NumericIndex]] = { +_dtype_to_index: Dict[Any, Type[NumericIndex]] = { np.int8: Int8Index, np.int16: Int16Index, np.int32: Int32Index, @@ -2702,7 +2704,7 @@ def from_arrow(cls, obj): return cudf.MultiIndex.from_arrow(obj) -def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: +def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: """ An internal Utility function to concat RangeIndex objects. """ @@ -2742,7 +2744,7 @@ def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: return RangeIndex(start, stop, step) -def _extended_gcd(a: int, b: int) -> tuple[int, int, int]: +def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]: """ Extended Euclidean algorithms to solve Bezout's identity: a*x + b*y = gcd(x, y) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 784be09494c..72878078593 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -135,8 +135,8 @@ class IndexedFrame(Frame): """ # mypy can't handle bound type variables as class members - _loc_indexer_type: type[_LocIndexerClass] # type: ignore - _iloc_indexer_type: type[_IlocIndexerClass] # type: ignore + _loc_indexer_type: Type[_LocIndexerClass] # type: ignore + _iloc_indexer_type: Type[_IlocIndexerClass] # type: ignore _index: cudf.core.index.BaseIndex def __init__(self, data=None, index=None): @@ -1486,7 +1486,7 @@ def _reset_index(self, level, drop, col_level=0, col_fill=""): def _first_or_last( self, offset, idx: int, op: Callable, side: str, slice_func: Callable - ) -> IndexedFrame: + ) -> "IndexedFrame": """Shared code path for ``first`` and ``last``.""" if not isinstance(self._index, cudf.core.index.DatetimeIndex): raise TypeError("'first' only supports a DatetimeIndex index.") diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index 7589ed27484..ead0cd566d9 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -57,7 +57,7 @@ def set(self, obj: Frame, value: ColumnBase, validate=False): def _match_join_keys( lcol: ColumnBase, rcol: ColumnBase, how: str -) -> tuple[ColumnBase, ColumnBase]: +) -> Tuple[ColumnBase, ColumnBase]: # Casts lcol and rcol to a common dtype for use as join keys. If no casting # is necessary, they are returned as is. @@ -118,7 +118,7 @@ def _match_join_keys( def _match_categorical_dtypes_both( lcol: CategoricalColumn, rcol: CategoricalColumn, how: str -) -> tuple[ColumnBase, ColumnBase]: +) -> Tuple[ColumnBase, ColumnBase]: ltype, rtype = lcol.dtype, rcol.dtype # when both are ordered and both have the same categories, diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 0c573d4efed..3796b596ad6 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -255,7 +255,7 @@ def set_names(self, names, level=None, inplace=False): def _from_data( cls, data: MutableMapping, - index: cudf.core.index.BaseIndex | None = None, + index: Optional[cudf.core.index.BaseIndex] = None, name: Any = None, ) -> MultiIndex: assert index is None @@ -781,9 +781,9 @@ def _index_and_downcast(self, result, index, index_key): def _get_row_major( self, df: DataFrameOrSeries, - row_tuple: ( - numbers.Number | slice | tuple[Any, ...] | list[tuple[Any, ...]] - ), + row_tuple: Union[ + numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]] + ], ) -> DataFrameOrSeries: if pd.api.types.is_bool_dtype( list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple @@ -805,9 +805,9 @@ def _get_row_major( def _validate_indexer( self, - indexer: ( - numbers.Number | slice | tuple[Any, ...] | list[tuple[Any, ...]] - ), + indexer: Union[ + numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]] + ], ): if isinstance(indexer, numbers.Number): return diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e2d350f0f5b..7f00162099a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -156,7 +156,7 @@ class _SeriesLocIndexer(_FrameIndexer): Label-based selection """ - def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: + def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: if isinstance(arg, pd.MultiIndex): arg = cudf.from_pandas(arg) @@ -281,7 +281,7 @@ class Series(SingleColumnFrame, IndexedFrame, Serializable): If ``False``, leaves ``np.nan`` values as is. """ - _accessors: set[Any] = set() + _accessors: Set[Any] = set() _loc_indexer_type = _SeriesLocIndexer _iloc_indexer_type = _SeriesIlocIndexer @@ -450,7 +450,7 @@ def __init__( def _from_data( cls, data: MutableMapping, - index: BaseIndex | None = None, + index: Optional[BaseIndex] = None, name: Any = None, ) -> Series: """ @@ -1049,7 +1049,7 @@ def __array_function__(self, func, types, args, kwargs): kwargs, ) - def map(self, arg, na_action=None) -> Series: + def map(self, arg, na_action=None) -> "Series": """ Map values of Series according to input correspondence. diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 2d98c66e726..7793a2fdf29 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -57,7 +57,7 @@ def _scan(self, op, axis=None, *args, **kwargs): def _from_data( cls, data: MutableMapping, - index: cudf.core.index.BaseIndex | None = None, + index: Optional[cudf.core.index.BaseIndex] = None, name: Any = None, ): @@ -118,12 +118,18 @@ def values_host(self): # noqa: D102 return self._column.values_host def to_cupy( - self, dtype: Dtype | None = None, copy: bool = True, na_value=None, + self, + dtype: Union[Dtype, None] = None, + copy: bool = True, + na_value=None, ) -> cupy.ndarray: # noqa: D102 return super().to_cupy(dtype, copy, na_value).flatten() def to_numpy( - self, dtype: Dtype | None = None, copy: bool = True, na_value=None, + self, + dtype: Union[Dtype, None] = None, + copy: bool = True, + na_value=None, ) -> np.ndarray: # noqa: D102 return super().to_numpy(dtype, copy, na_value).flatten() @@ -288,7 +294,7 @@ def _make_operands_for_binop( reflect: bool = False, *args, **kwargs, - ) -> dict[str | None, tuple[ColumnBase, Any, bool, Any]]: + ) -> Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]: """Generate the dictionary of operands used for a binary operation. Parameters diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index cb72faac004..782b74ef4a6 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -61,7 +61,7 @@ def __call__( max_num_rows: int, add_special_tokens: bool = True, padding: str = "max_length", - truncation: bool | str = False, + truncation: Union[bool, str] = False, stride: int = 0, return_tensors: str = "cp", return_token_type_ids: bool = False, diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 79d08cca2a2..59c291eea0b 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -239,7 +239,7 @@ def assert_index_equal( right, exact="equiv", check_names: bool = True, - check_less_precise: bool | int = False, + check_less_precise: Union[bool, int] = False, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, From 4c57d5821640370ea4b32653629f17db3334e01e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 17:10:30 -0600 Subject: [PATCH 06/10] Use f-string. --- python/cudf/cudf/_version.py | 6 +++--- python/cudf_kafka/cudf_kafka/_version.py | 6 +++--- python/custreamz/custreamz/_version.py | 6 +++--- python/dask_cudf/dask_cudf/_version.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/_version.py b/python/cudf/cudf/_version.py index be915b54c46..c6281349c50 100644 --- a/python/cudf/cudf/_version.py +++ b/python/cudf/cudf/_version.py @@ -308,9 +308,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, - ) + pieces[ + "error" + ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/cudf_kafka/cudf_kafka/_version.py b/python/cudf_kafka/cudf_kafka/_version.py index 8475afe1a6c..6cd10cc10bf 100644 --- a/python/cudf_kafka/cudf_kafka/_version.py +++ b/python/cudf_kafka/cudf_kafka/_version.py @@ -308,9 +308,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, - ) + pieces[ + "error" + ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/custreamz/custreamz/_version.py b/python/custreamz/custreamz/_version.py index 8131fbf0c30..106fc3524f9 100644 --- a/python/custreamz/custreamz/_version.py +++ b/python/custreamz/custreamz/_version.py @@ -308,9 +308,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, - ) + pieces[ + "error" + ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py index 85dbc55c197..104879fce36 100644 --- a/python/dask_cudf/dask_cudf/_version.py +++ b/python/dask_cudf/dask_cudf/_version.py @@ -308,9 +308,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, - ) + pieces[ + "error" + ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] From 9a0cd869ffafdbb872db19dc3b2b898e74cc52cf Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 17:13:16 -0600 Subject: [PATCH 07/10] Remove tests of equivalent string objects. --- python/cudf/cudf/tests/test_api_types.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py index 1bc5f092aa7..e7cf113f604 100644 --- a/python/cudf/cudf/tests/test_api_types.py +++ b/python/cudf/cudf/tests/test_api_types.py @@ -18,8 +18,6 @@ (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -129,8 +127,6 @@ def test_is_categorical_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, True), @@ -236,8 +232,6 @@ def test_is_numeric_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -343,8 +337,6 @@ def test_is_integer_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -451,8 +443,6 @@ def test_is_integer(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -558,8 +548,6 @@ def test_is_string_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -665,8 +653,6 @@ def test_is_datetime_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -772,8 +758,6 @@ def test_is_list_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -882,8 +866,6 @@ def test_is_struct_dtype(obj, expect): (float(), False), (complex(), False), ("", False), - ("", False), - (r"", False), (object(), False), # Base Python types. (bool, False), @@ -989,8 +971,6 @@ def test_is_decimal_dtype(obj, expect): float(), complex(), "", - "", - r"", object(), # Base Python types. bool, @@ -1071,8 +1051,6 @@ def test_pandas_agreement(obj): float(), complex(), "", - "", - r"", object(), # Base Python types. bool, From f7885659582f73c0f932a3f3aaf30573fbac30b3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 2 Feb 2022 23:56:28 -0600 Subject: [PATCH 08/10] Inline get_nelem(). --- python/cudf/cudf/tests/test_groupby.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 7b92356dfba..61c7d1958a0 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -84,10 +84,6 @@ def make_frame( return df -def get_nelem(): - yield from [2, 3, 1000] - - @pytest.fixture def gdf(): return DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]}) @@ -1095,7 +1091,7 @@ def test_groupby_cumcount(): ) -@pytest.mark.parametrize("nelem", get_nelem()) +@pytest.mark.parametrize("nelem", [2, 3, 1000]) @pytest.mark.parametrize("as_index", [True, False]) @pytest.mark.parametrize( "agg", ["min", "max", "idxmin", "idxmax", "mean", "count"] From ad69bb90f971b94c7be378d7a9ffdf00092a3333 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 3 Feb 2022 00:16:00 -0600 Subject: [PATCH 09/10] Use f-string. --- python/dask_cudf/dask_cudf/io/orc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index 76c1978b83c..2d326e41c3e 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -79,9 +79,7 @@ def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs): ex = set(columns) - set(schema) if ex: raise ValueError( - "Requested columns ({}) not in schema ({})".format( - ex, set(schema) - ) + "Requested columns ({ex}) not in schema ({set(schema)})" ) else: columns = list(schema) From 449503f6bc1033de281fc27dbf9677af30d0ea7c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 3 Feb 2022 00:17:41 -0600 Subject: [PATCH 10/10] Revert changes to versioneer.py. --- python/cudf/versioneer.py | 44 ++++++++++++++++++---------------- python/custreamz/versioneer.py | 44 ++++++++++++++++++---------------- python/dask_cudf/versioneer.py | 44 ++++++++++++++++++---------------- 3 files changed, 69 insertions(+), 63 deletions(-) diff --git a/python/cudf/versioneer.py b/python/cudf/versioneer.py index bbf456a84ef..a6537a34ede 100644 --- a/python/cudf/versioneer.py +++ b/python/cudf/versioneer.py @@ -275,6 +275,7 @@ """ +from __future__ import print_function import errno import json @@ -344,7 +345,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg) as f: + with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -406,7 +407,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except OSError: + except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -416,7 +417,7 @@ def run_command( return None, None else: if verbose: - print(f"unable to find command, tried {commands}") + print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -963,7 +964,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs) + f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -978,7 +979,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except OSError: + except EnvironmentError: pass return keywords @@ -1002,11 +1003,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} + refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1015,7 +1016,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r"\d", r)} + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1122,8 +1123,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1173,13 +1175,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes") + f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except OSError: + except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") @@ -1243,7 +1245,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except OSError: + except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1270,7 +1272,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set {} to '{}'".format(filename, versions["version"])) + print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): @@ -1495,7 +1497,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print(f"got version from file {versionfile_abs} {ver}") + print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass @@ -1771,7 +1773,7 @@ def do_setup(): try: cfg = get_config_from_root(root) except ( - OSError, + EnvironmentError, configparser.NoSectionError, configparser.NoOptionError, ) as e: @@ -1801,9 +1803,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy) as f: + with open(ipy, "r") as f: old = f.read() - except OSError: + except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1822,12 +1824,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in) as f: + with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except OSError: + except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1861,7 +1863,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py") as f: + with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/custreamz/versioneer.py b/python/custreamz/versioneer.py index 2d1032e584e..9c9ddae7340 100644 --- a/python/custreamz/versioneer.py +++ b/python/custreamz/versioneer.py @@ -275,6 +275,7 @@ """ +from __future__ import print_function import errno import json @@ -344,7 +345,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg) as f: + with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -406,7 +407,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except OSError: + except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -416,7 +417,7 @@ def run_command( return None, None else: if verbose: - print(f"unable to find command, tried {commands}") + print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -963,7 +964,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs) + f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -978,7 +979,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except OSError: + except EnvironmentError: pass return keywords @@ -1002,11 +1003,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} + refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1015,7 +1016,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r"\d", r)} + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1122,8 +1123,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1173,13 +1175,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes") + f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except OSError: + except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") @@ -1243,7 +1245,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except OSError: + except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1270,7 +1272,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set {} to '{}'".format(filename, versions["version"])) + print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): @@ -1495,7 +1497,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print(f"got version from file {versionfile_abs} {ver}") + print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass @@ -1771,7 +1773,7 @@ def do_setup(): try: cfg = get_config_from_root(root) except ( - OSError, + EnvironmentError, configparser.NoSectionError, configparser.NoOptionError, ) as e: @@ -1801,9 +1803,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy) as f: + with open(ipy, "r") as f: old = f.read() - except OSError: + except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1822,12 +1824,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in) as f: + with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except OSError: + except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1861,7 +1863,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py") as f: + with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/dask_cudf/versioneer.py b/python/dask_cudf/versioneer.py index 1e94791233c..a560f2e8797 100644 --- a/python/dask_cudf/versioneer.py +++ b/python/dask_cudf/versioneer.py @@ -275,6 +275,7 @@ """ +from __future__ import print_function import errno import json @@ -344,7 +345,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg) as f: + with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -406,7 +407,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except OSError: + except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -416,7 +417,7 @@ def run_command( return None, None else: if verbose: - print(f"unable to find command, tried {commands}") + print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -963,7 +964,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs) + f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -978,7 +979,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except OSError: + except EnvironmentError: pass return keywords @@ -1002,11 +1003,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} + refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1015,7 +1016,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r"\d", r)} + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1122,8 +1123,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( - full_tag, tag_prefix, + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1173,13 +1175,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes") + f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except OSError: + except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") @@ -1243,7 +1245,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except OSError: + except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1270,7 +1272,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set {} to '{}'".format(filename, versions["version"])) + print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): @@ -1495,7 +1497,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print(f"got version from file {versionfile_abs} {ver}") + print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass @@ -1771,7 +1773,7 @@ def do_setup(): try: cfg = get_config_from_root(root) except ( - OSError, + EnvironmentError, configparser.NoSectionError, configparser.NoOptionError, ) as e: @@ -1801,9 +1803,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy) as f: + with open(ipy, "r") as f: old = f.read() - except OSError: + except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1822,12 +1824,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in) as f: + with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except OSError: + except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1861,7 +1863,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py") as f: + with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import")