From e23aa5cbb8ee3eff84d4e3cdd9ae1194e57f776f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 16:41:07 -0600 Subject: [PATCH] Run pyupgrade 2.31.0, except for changes to typing. --- ci/utils/nbtestlog2junitxml.py | 10 ++-- cpp/scripts/run-clang-format.py | 9 ++-- cpp/scripts/run-clang-tidy.py | 7 ++- cpp/scripts/sort_ninja_log.py | 2 +- docs/cudf/source/conf.py | 1 - python/cudf/cudf/_fuzz_testing/fuzzer.py | 2 +- python/cudf/cudf/_fuzz_testing/io.py | 4 +- python/cudf/cudf/_fuzz_testing/main.py | 2 +- python/cudf/cudf/_version.py | 19 ++++---- python/cudf/cudf/comm/gpuarrow.py | 2 +- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/column/string.py | 2 +- python/cudf/cudf/core/dataframe.py | 12 ++--- python/cudf/cudf/core/groupby/groupby.py | 2 +- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/join/join.py | 12 ++--- python/cudf/cudf/core/multiindex.py | 6 +-- python/cudf/cudf/core/scalar.py | 2 +- python/cudf/cudf/core/series.py | 10 ++-- python/cudf/cudf/core/udf/typing.py | 4 +- python/cudf/cudf/datasets.py | 6 +-- python/cudf/cudf/tests/test_api_types.py | 22 ++++----- python/cudf/cudf/tests/test_binops.py | 1 - python/cudf/cudf/tests/test_copying.py | 2 - python/cudf/cudf/tests/test_cuda_apply.py | 5 +- python/cudf/cudf/tests/test_dataframe.py | 4 +- python/cudf/cudf/tests/test_factorize.py | 4 +- python/cudf/cudf/tests/test_gcs.py | 6 +-- python/cudf/cudf/tests/test_groupby.py | 3 +- python/cudf/cudf/tests/test_hdfs.py | 26 +++++----- python/cudf/cudf/tests/test_query.py | 1 - python/cudf/cudf/tests/test_reductions.py | 1 - python/cudf/cudf/tests/test_s3.py | 44 ++++++++--------- python/cudf/cudf/tests/test_sorting.py | 2 +- python/cudf/cudf/tests/test_text.py | 2 +- python/cudf/cudf/tests/test_transform.py | 1 - python/cudf/cudf/tests/test_udf_binops.py | 1 - python/cudf/cudf/tests/test_unaops.py | 2 - python/cudf/cudf/utils/applyutils.py | 8 ++-- python/cudf/cudf/utils/cudautils.py | 2 +- python/cudf/cudf/utils/dtypes.py | 6 +-- python/cudf/cudf/utils/hash_vocab_utils.py | 22 ++++----- python/cudf/cudf/utils/queryutils.py | 8 ++-- python/cudf/setup.py | 2 +- python/cudf/versioneer.py | 48 ++++++++----------- python/cudf_kafka/cudf_kafka/_version.py | 19 ++++---- python/cudf_kafka/versioneer.py | 48 ++++++++----------- python/custreamz/custreamz/_version.py | 19 ++++---- .../custreamz/tests/test_dataframes.py | 1 - python/custreamz/versioneer.py | 48 ++++++++----------- python/dask_cudf/dask_cudf/_version.py | 19 ++++---- python/dask_cudf/dask_cudf/core.py | 12 ++--- python/dask_cudf/dask_cudf/io/orc.py | 4 +- .../dask_cudf/io/tests/test_parquet.py | 4 +- python/dask_cudf/setup.py | 2 +- python/dask_cudf/versioneer.py | 48 ++++++++----------- 56 files changed, 258 insertions(+), 307 deletions(-) diff --git a/ci/utils/nbtestlog2junitxml.py b/ci/utils/nbtestlog2junitxml.py index 15b362e4b70..6a421279112 100644 --- a/ci/utils/nbtestlog2junitxml.py +++ b/ci/utils/nbtestlog2junitxml.py @@ -7,11 +7,11 @@ from enum import Enum -startingPatt = re.compile("^STARTING: ([\w\.\-]+)$") -skippingPatt = re.compile("^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$") -exitCodePatt = re.compile("^EXIT CODE: (\d+)$") -folderPatt = re.compile("^FOLDER: ([\w\.\-]+)$") -timePatt = re.compile("^real\s+([\d\.ms]+)$") +startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$") +skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$") +exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$") +folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$") +timePatt = re.compile(r"^real\s+([\d\.ms]+)$") linePatt = re.compile("^" + ("-" * 80) + "$") diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index a7c83da22c5..3d462d65fb8 100755 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -13,7 +13,6 @@ # limitations under the License. # -from __future__ import print_function import argparse import os @@ -124,9 +123,9 @@ def run_clang_format(src, dst, exe, verbose, inplace): os.makedirs(dstdir) # run the clang format command itself if src == dst: - cmd = "%s -i %s" % (exe, src) + cmd = f"{exe} -i {src}" else: - cmd = "%s %s > %s" % (exe, src, dst) + cmd = f"{exe} {src} > {dst}" try: subprocess.check_call(cmd, shell=True) except subprocess.CalledProcessError: @@ -134,9 +133,9 @@ def run_clang_format(src, dst, exe, verbose, inplace): raise # run the diff to check if there are any formatting issues if inplace: - cmd = "diff -q %s %s >/dev/null" % (src, dst) + cmd = f"diff -q {src} {dst} >/dev/null" else: - cmd = "diff %s %s" % (src, dst) + cmd = f"diff {src} {dst}" try: subprocess.check_call(cmd, shell=True) diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 3a1a663e231..30e937d7f4d 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -13,7 +13,6 @@ # limitations under the License. # -from __future__ import print_function import re import os import subprocess @@ -67,7 +66,7 @@ def parse_args(): def get_all_commands(cdb): - with open(cdb, "r") as fp: + with open(cdb) as fp: return json.load(fp) @@ -195,10 +194,10 @@ def collect_result(result): def print_result(passed, stdout, file): status_str = "PASSED" if passed else "FAILED" - print("%s File:%s %s %s" % (SEPARATOR, file, status_str, SEPARATOR)) + print(f"{SEPARATOR} File:{file} {status_str} {SEPARATOR}") if stdout: print(stdout) - print("%s File:%s ENDS %s" % (SEPARATOR, file, SEPARATOR)) + print(f"{SEPARATOR} File:{file} ENDS {SEPARATOR}") def print_results(): diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py index bac6697da82..1891c607c5f 100755 --- a/cpp/scripts/sort_ninja_log.py +++ b/cpp/scripts/sort_ninja_log.py @@ -33,7 +33,7 @@ # build a map of the log entries entries = {} -with open(log_file, "r") as log: +with open(log_file) as log: for line in log: entry = line.split() if len(entry) > 4: diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 3d6d3ceb399..5de1d918a09 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # Copyright (c) 2018-2021, NVIDIA CORPORATION. # diff --git a/python/cudf/cudf/_fuzz_testing/fuzzer.py b/python/cudf/cudf/_fuzz_testing/fuzzer.py index 484b3fb26f4..a51a5073510 100644 --- a/python/cudf/cudf/_fuzz_testing/fuzzer.py +++ b/python/cudf/cudf/_fuzz_testing/fuzzer.py @@ -14,7 +14,7 @@ ) -class Fuzzer(object): +class Fuzzer: def __init__( self, target, diff --git a/python/cudf/cudf/_fuzz_testing/io.py b/python/cudf/cudf/_fuzz_testing/io.py index 193fb4c7f7f..dfc59a1f18d 100644 --- a/python/cudf/cudf/_fuzz_testing/io.py +++ b/python/cudf/cudf/_fuzz_testing/io.py @@ -16,7 +16,7 @@ ) -class IOFuzz(object): +class IOFuzz: def __init__( self, dirs=None, @@ -59,7 +59,7 @@ def __init__( self._current_buffer = None def _load_params(self, path): - with open(path, "r") as f: + with open(path) as f: params = json.load(f) self._inputs.append(params) diff --git a/python/cudf/cudf/_fuzz_testing/main.py b/python/cudf/cudf/_fuzz_testing/main.py index 7b28a4c4970..6b536fc3e2e 100644 --- a/python/cudf/cudf/_fuzz_testing/main.py +++ b/python/cudf/cudf/_fuzz_testing/main.py @@ -3,7 +3,7 @@ from cudf._fuzz_testing import fuzzer -class PythonFuzz(object): +class PythonFuzz: def __init__(self, func, params=None, data_handle=None, **kwargs): self.function = func self.data_handler_class = data_handle diff --git a/python/cudf/cudf/_version.py b/python/cudf/cudf/_version.py index a511ab98acf..be915b54c46 100644 --- a/python/cudf/cudf/_version.py +++ b/python/cudf/cudf/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py index b6089b65aa5..7879261139d 100644 --- a/python/cudf/cudf/comm/gpuarrow.py +++ b/python/cudf/cudf/comm/gpuarrow.py @@ -58,7 +58,7 @@ def to_dict(self): return dc -class GpuArrowNodeReader(object): +class GpuArrowNodeReader: def __init__(self, table, index): self._table = table self._field = table.schema[index] diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index b1335c7c076..5c5ccaf94c0 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1,6 +1,6 @@ # Copyright (c) 2021, NVIDIA CORPORATION. -from __future__ import annotations, division, print_function +from __future__ import annotations import pickle import warnings diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 9b44b4e6831..7f62d8fe03f 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5083,7 +5083,7 @@ def to_arrow(self) -> pa.Array: """ if self.null_count == len(self): return pa.NullArray.from_buffers( - pa.null(), len(self), [pa.py_buffer((b""))] + pa.null(), len(self), [pa.py_buffer(b"")] ) else: return super().to_arrow() diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c686cd0fd39..bd08ac385c7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1,6 +1,6 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. -from __future__ import annotations, division +from __future__ import annotations import functools import inspect @@ -4347,7 +4347,7 @@ def _verbose_repr(): dtype = self.dtypes.iloc[i] col = pprint_thing(col) - line_no = _put_str(" {num}".format(num=i), space_num) + line_no = _put_str(f" {i}", space_num) count = "" if show_counts: count = counts[i] @@ -5694,9 +5694,7 @@ def select_dtypes(self, include=None, exclude=None): if issubclass(dtype.type, e_dtype): exclude_subtypes.add(dtype.type) - include_all = set( - [cudf_dtype_from_pydata_dtype(d) for d in self.dtypes] - ) + include_all = {cudf_dtype_from_pydata_dtype(d) for d in self.dtypes} if include: inclusion = include_all & include_subtypes @@ -6416,8 +6414,8 @@ def _align_indices(lhs, rhs): lhs_out = DataFrame(index=df.index) rhs_out = DataFrame(index=df.index) common = set(lhs.columns) & set(rhs.columns) - common_x = set(["{}_x".format(x) for x in common]) - common_y = set(["{}_y".format(x) for x in common]) + common_x = {f"{x}_x" for x in common} + common_y = {f"{x}_y" for x in common} for col in df.columns: if col in common_x: lhs_out[col[:-2]] = df[col] diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index a393d8e9457..ff700144bed 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1461,7 +1461,7 @@ def apply(self, func): # TODO: should we define this as a dataclass instead? -class Grouper(object): +class Grouper: def __init__( self, key=None, level=None, freq=None, closed=None, label=None ): diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 91c7a740699..88371666ce6 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1,6 +1,6 @@ # Copyright (c) 2018-2021, NVIDIA CORPORATION. -from __future__ import annotations, division, print_function +from __future__ import annotations import math import pickle diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 704274815f6..39ff4718550 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -169,13 +169,11 @@ def __init__( if on else set() if (self._using_left_index or self._using_right_index) - else set( - [ - lkey.name - for lkey, rkey in zip(self._left_keys, self._right_keys) - if lkey.name == rkey.name - ] - ) + else { + lkey.name + for lkey, rkey in zip(self._left_keys, self._right_keys) + if lkey.name == rkey.name + } ) def perform_merge(self) -> Frame: diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index fa84889adea..3796b596ad6 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -115,7 +115,7 @@ def __init__( "MultiIndex has unequal number of levels and " "codes and is inconsistent!" ) - if len(set(c.size for c in codes._data.columns)) != 1: + if len({c.size for c in codes._data.columns}) != 1: raise ValueError( "MultiIndex length of codes does not match " "and is inconsistent!" @@ -752,7 +752,7 @@ def _index_and_downcast(self, result, index, index_key): # Pandas returns an empty Series with a tuple as name # the one expected result column result = cudf.Series._from_data( - {}, name=tuple((col[0] for col in index._data.columns)) + {}, name=tuple(col[0] for col in index._data.columns) ) elif out_index._num_columns == 1: # If there's only one column remaining in the output index, convert @@ -1222,7 +1222,7 @@ def _poplevels(self, level): if not pd.api.types.is_list_like(level): level = (level,) - ilevels = sorted([self._level_index_from_level(lev) for lev in level]) + ilevels = sorted(self._level_index_from_level(lev) for lev in level) if not ilevels: return None diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index b0770b71ca6..134b94bf0f2 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -17,7 +17,7 @@ ) -class Scalar(object): +class Scalar: """ A GPU-backed scalar object with NumPy scalar like properties May be used in binary operations against other scalars, cuDF diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 0371c40274f..7f00162099a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -167,7 +167,7 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: if ( isinstance(arg, tuple) and len(arg) == self._frame._index.nlevels - and not any((isinstance(x, slice) for x in arg)) + and not any(isinstance(x, slice) for x in arg) ): result = result.iloc[0] return result @@ -3121,7 +3121,7 @@ def _prepare_percentiles(percentiles): return percentiles def _format_percentile_names(percentiles): - return ["{0}%".format(int(x * 100)) for x in percentiles] + return [f"{int(x * 100)}%" for x in percentiles] def _format_stats_values(stats_data): return map(lambda x: round(x, 6), stats_data) @@ -3223,7 +3223,7 @@ def _describe_timestamp(self): .to_numpy(na_value=np.nan), ) ), - "max": str(pd.Timestamp((self.max()))), + "max": str(pd.Timestamp(self.max())), } return Series( @@ -3702,7 +3702,7 @@ def wrapper(self, other, level=None, fill_value=None, axis=0): setattr(Series, binop, make_binop_func(binop)) -class DatetimeProperties(object): +class DatetimeProperties: """ Accessor object for datetimelike properties of the Series values. @@ -4644,7 +4644,7 @@ def strftime(self, date_format, *args, **kwargs): ) -class TimedeltaProperties(object): +class TimedeltaProperties: """ Accessor object for timedeltalike properties of the Series values. diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index da7ff4c0e32..56e8bec74dc 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -133,8 +133,8 @@ def typeof_masked(val, c): class MaskedConstructor(ConcreteTemplate): key = api.Masked units = ["ns", "ms", "us", "s"] - datetime_cases = set(types.NPDatetime(u) for u in units) - timedelta_cases = set(types.NPTimedelta(u) for u in units) + datetime_cases = {types.NPDatetime(u) for u in units} + timedelta_cases = {types.NPTimedelta(u) for u in units} cases = [ nb_signature(MaskedType(t), t, types.boolean) for t in ( diff --git a/python/cudf/cudf/datasets.py b/python/cudf/cudf/datasets.py index 2341a5c23b9..d7a2fedef59 100644 --- a/python/cudf/cudf/datasets.py +++ b/python/cudf/cudf/datasets.py @@ -57,9 +57,7 @@ def timeseries( pd.date_range(start, end, freq=freq, name="timestamp") ) state = np.random.RandomState(seed) - columns = dict( - (k, make[dt](len(index), state)) for k, dt in dtypes.items() - ) + columns = {k: make[dt](len(index), state) for k, dt in dtypes.items()} df = pd.DataFrame(columns, index=index, columns=sorted(columns)) if df.index[-1] == end: df = df.iloc[:-1] @@ -110,7 +108,7 @@ def randomdata(nrows=10, dtypes=None, seed=None): if dtypes is None: dtypes = {"id": int, "x": float, "y": float} state = np.random.RandomState(seed) - columns = dict((k, make[dt](nrows, state)) for k, dt in dtypes.items()) + columns = {k: make[dt](nrows, state) for k, dt in dtypes.items()} df = pd.DataFrame(columns, columns=sorted(columns)) return cudf.from_pandas(df) diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py index 4d104c122d1..f8f93eaa2e3 100644 --- a/python/cudf/cudf/tests/test_api_types.py +++ b/python/cudf/cudf/tests/test_api_types.py @@ -17,7 +17,7 @@ (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -128,7 +128,7 @@ def test_is_categorical_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -235,7 +235,7 @@ def test_is_numeric_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -342,7 +342,7 @@ def test_is_integer_dtype(obj, expect): (int(), True), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -450,7 +450,7 @@ def test_is_integer(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -557,7 +557,7 @@ def test_is_string_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -664,7 +664,7 @@ def test_is_datetime_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -771,7 +771,7 @@ def test_is_list_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -881,7 +881,7 @@ def test_is_struct_dtype(obj, expect): (int(), False), (float(), False), (complex(), False), - (str(), False), + ('', False), ("", False), (r"", False), (object(), False), @@ -988,7 +988,7 @@ def test_is_decimal_dtype(obj, expect): int(), float(), complex(), - str(), + '', "", r"", object(), @@ -1070,7 +1070,7 @@ def test_pandas_agreement(obj): int(), float(), complex(), - str(), + '', "", r"", object(), diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 921f2de38c2..76add8b9c5d 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1,6 +1,5 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. -from __future__ import division import decimal import operator diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py index 21a6a9172db..0d0ba579f22 100644 --- a/python/cudf/cudf/tests/test_copying.py +++ b/python/cudf/cudf/tests/test_copying.py @@ -1,5 +1,3 @@ -from __future__ import division, print_function - import numpy as np import pandas as pd import pytest diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py index a00dbbba5f0..e8bd64b5061 100644 --- a/python/cudf/cudf/tests/test_cuda_apply.py +++ b/python/cudf/cudf/tests/test_cuda_apply.py @@ -98,7 +98,7 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2): expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.hstack( - np.arange((e - s)) for s, e in zip(chunks, chunks[1:] + [len(df)]) + np.arange(e - s) for s, e in zip(chunks, chunks[1:] + [len(df)]) ) outdf = df.apply_chunks( @@ -141,8 +141,7 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2): expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.hstack( - tpb * np.arange((e - s)) - for s, e in zip(chunks, chunks[1:] + [len(df)]) + tpb * np.arange(e - s) for s, e in zip(chunks, chunks[1:] + [len(df)]) ) outdf = df.apply_chunks( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 5844055f7f0..b0c1b6fe2b6 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -845,7 +845,7 @@ def test_dataframe_to_string_wide(monkeypatch): # Test basic df = cudf.DataFrame() for i in range(100): - df["a{}".format(i)] = list(range(3)) + df[f"a{i}"] = list(range(3)) pd.options.display.max_columns = 0 got = df.to_string() @@ -1163,7 +1163,7 @@ def test_dataframe_hash_partition(nrows, nparts, nkeys): gdf = cudf.DataFrame() keycols = [] for i in range(nkeys): - keyname = "key{}".format(i) + keyname = f"key{i}" gdf[keyname] = np.random.randint(0, 7 - i, nrows) keycols.append(keyname) gdf["val1"] = np.random.randint(0, nrows * 2, nrows) diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py index 1f16686a6a6..3081b7c4a6e 100644 --- a/python/cudf/cudf/tests/test_factorize.py +++ b/python/cudf/cudf/tests/test_factorize.py @@ -23,7 +23,7 @@ def test_factorize_series_obj(ncats, nelem): assert isinstance(uvals, cp.ndarray) assert isinstance(labels, Index) - encoder = dict((labels[idx], idx) for idx in range(len(labels))) + encoder = {labels[idx]: idx for idx in range(len(labels))} handcoded = [encoder[v] for v in arr] np.testing.assert_array_equal(uvals.get(), handcoded) @@ -42,7 +42,7 @@ def test_factorize_index_obj(ncats, nelem): assert isinstance(uvals, cp.ndarray) assert isinstance(labels, Index) - encoder = dict((labels[idx], idx) for idx in range(len(labels))) + encoder = {labels[idx]: idx for idx in range(len(labels))} handcoded = [encoder[v] for v in arr] np.testing.assert_array_equal(uvals.get(), handcoded) diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py index db53529b22f..307232b1305 100644 --- a/python/cudf/cudf/tests/test_gcs.py +++ b/python/cudf/cudf/tests/test_gcs.py @@ -48,14 +48,14 @@ def mock_size(*args): # use_python_file_object=True, because the pyarrow # `open_input_file` command will fail (since it doesn't # use the monkey-patched `open` definition) - got = cudf.read_csv("gcs://{}".format(fpath), use_python_file_object=False) + got = cudf.read_csv(f"gcs://{fpath}", use_python_file_object=False) assert_eq(pdf, got) # AbstractBufferedFile -> PythonFile conversion # will work fine with the monkey-patched FS if we # pass in an fsspec file object fs = gcsfs.core.GCSFileSystem() - with fs.open("gcs://{}".format(fpath)) as f: + with fs.open(f"gcs://{fpath}") as f: got = cudf.read_csv(f) assert_eq(pdf, got) @@ -69,7 +69,7 @@ def mock_open(*args, **kwargs): return open(local_filepath, "wb") monkeypatch.setattr(gcsfs.core.GCSFileSystem, "open", mock_open) - gdf.to_orc("gcs://{}".format(gcs_fname)) + gdf.to_orc(f"gcs://{gcs_fname}") got = pa.orc.ORCFile(local_filepath).read().to_pandas() assert_eq(pdf, got) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index f5decd62ea9..7b92356dfba 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -85,8 +85,7 @@ def make_frame( def get_nelem(): - for elem in [2, 3, 1000]: - yield elem + yield from [2, 3, 1000] @pytest.fixture diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py index 24554f113bb..2d61d6693cb 100644 --- a/python/cudf/cudf/tests/test_hdfs.py +++ b/python/cudf/cudf/tests/test_hdfs.py @@ -62,7 +62,7 @@ def test_read_csv(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_csv_reader.csv".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_csv_reader.csv" got = cudf.read_csv(hd_fpath) @@ -81,7 +81,7 @@ def test_write_csv(pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_csv_writer.csv".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_csv_writer.csv" gdf.to_csv(hd_fpath, index=False) @@ -107,7 +107,7 @@ def test_read_parquet(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_parquet_reader.parquet".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_parquet_reader.parquet" got = cudf.read_parquet(hd_fpath) @@ -126,7 +126,7 @@ def test_write_parquet(pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_parquet_writer.parquet".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_parquet_writer.parquet" gdf.to_parquet(hd_fpath) @@ -153,7 +153,7 @@ def test_write_parquet_partitioned(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_parquet_partitioned.parquet".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_parquet_partitioned.parquet" # Clear data written from previous runs hdfs.rm(f"{basedir}/test_parquet_partitioned.parquet", recursive=True) gdf.to_parquet( @@ -186,7 +186,7 @@ def test_read_json(tmpdir, pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_json_reader.json".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_json_reader.json" got = cudf.read_json(hd_fpath, engine="cudf", orient="records", lines=True) @@ -207,9 +207,9 @@ def test_read_orc(datadir, hdfs, test_url): hdfs.upload(basedir + "/file.orc", buffer) if test_url: - hd_fpath = "hdfs://{}:{}{}/file.orc".format(host, port, basedir) + hd_fpath = f"hdfs://{host}:{port}{basedir}/file.orc" else: - hd_fpath = "hdfs://{}/file.orc".format(basedir) + hd_fpath = f"hdfs://{basedir}/file.orc" got = cudf.read_orc(hd_fpath) expect = orc.ORCFile(buffer).read().to_pandas() @@ -226,7 +226,7 @@ def test_write_orc(pdf, hdfs, test_url): host, port, basedir ) else: - hd_fpath = "hdfs://{}/test_orc_writer.orc".format(basedir) + hd_fpath = f"hdfs://{basedir}/test_orc_writer.orc" gdf.to_orc(hd_fpath) @@ -247,9 +247,9 @@ def test_read_avro(datadir, hdfs, test_url): hdfs.upload(basedir + "/file.avro", buffer) if test_url: - hd_fpath = "hdfs://{}:{}{}/file.avro".format(host, port, basedir) + hd_fpath = f"hdfs://{host}:{port}{basedir}/file.avro" else: - hd_fpath = "hdfs://{}/file.avro".format(basedir) + hd_fpath = f"hdfs://{basedir}/file.avro" got = cudf.read_avro(hd_fpath) with open(fname, mode="rb") as f: @@ -270,7 +270,7 @@ def test_storage_options(tmpdir, pdf, hdfs): # Write to hdfs hdfs.upload(basedir + "/file.csv", buffer) - hd_fpath = "hdfs://{}/file.csv".format(basedir) + hd_fpath = f"hdfs://{basedir}/file.csv" storage_options = {"host": host, "port": port} @@ -293,7 +293,7 @@ def test_storage_options_error(tmpdir, pdf, hdfs): # Write to hdfs hdfs.upload(basedir + "/file.csv", buffer) - hd_fpath = "hdfs://{}:{}{}/file.avro".format(host, port, basedir) + hd_fpath = f"hdfs://{host}:{port}{basedir}/file.avro" storage_options = {"host": host, "port": port} diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py index 3de38b2cf6f..09129a43f07 100644 --- a/python/cudf/cudf/tests/test_query.py +++ b/python/cudf/cudf/tests/test_query.py @@ -1,6 +1,5 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from __future__ import division, print_function import datetime import inspect diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index 40add502309..7106ab54686 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -1,6 +1,5 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -from __future__ import division, print_function import re from decimal import Decimal diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index da1ffc1fc16..29060927d75 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -147,7 +147,7 @@ def test_read_csv(s3_base, s3so, pdf, bytes_per_thread): # Use fsspec file object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, bytes_per_thread=bytes_per_thread, use_python_file_object=False, @@ -157,7 +157,7 @@ def test_read_csv(s3_base, s3so, pdf, bytes_per_thread): # Use Arrow PythonFile object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, bytes_per_thread=bytes_per_thread, use_python_file_object=True, @@ -174,7 +174,7 @@ def test_read_csv_arrow_nativefile(s3_base, s3so, pdf): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file("{}/{}".format(bname, fname)) as fil: + with fs.open_input_file(f"{bname}/{fname}") as fil: got = cudf.read_csv(fil) assert_eq(pdf, got) @@ -193,7 +193,7 @@ def test_read_csv_byte_range( # Use fsspec file object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, byte_range=(74, 73), bytes_per_thread=bytes_per_thread, @@ -213,15 +213,15 @@ def test_write_csv(s3_base, s3so, pdf, chunksize): gdf = cudf.from_pandas(pdf) with s3_context(s3_base=s3_base, bucket=bname) as s3fs: gdf.to_csv( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", index=False, chunksize=chunksize, storage_options=s3so, ) - assert s3fs.exists("s3://{}/{}".format(bname, fname)) + assert s3fs.exists(f"s3://{bname}/{fname}") # TODO: Update to use `storage_options` from pandas v1.2.0 - got = pd.read_csv(s3fs.open("s3://{}/{}".format(bname, fname))) + got = pd.read_csv(s3fs.open(f"s3://{bname}/{fname}")) assert_eq(pdf, got) @@ -248,7 +248,7 @@ def test_read_parquet( buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got1 = cudf.read_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", open_file_options=( {"precache_options": {"method": precache}} if use_python_file_object @@ -266,9 +266,9 @@ def test_read_parquet( buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): fs = get_fs_token_paths( - "s3://{}/{}".format(bname, fname), storage_options=s3so + f"s3://{bname}/{fname}", storage_options=s3so )[0] - with fs.open("s3://{}/{}".format(bname, fname), mode="rb") as f: + with fs.open(f"s3://{bname}/{fname}", mode="rb") as f: got2 = cudf.read_parquet( f, bytes_per_thread=bytes_per_thread, @@ -297,7 +297,7 @@ def test_read_parquet_ext( buffer.seek(0) with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got1 = cudf.read_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, bytes_per_thread=bytes_per_thread, footer_sample_size=3200, @@ -326,7 +326,7 @@ def test_read_parquet_arrow_nativefile(s3_base, s3so, pdf, columns): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file("{}/{}".format(bname, fname)) as fil: + with fs.open_input_file(f"{bname}/{fname}") as fil: got = cudf.read_parquet(fil, columns=columns) expect = pdf[columns] if columns else pdf @@ -343,7 +343,7 @@ def test_read_parquet_filters(s3_base, s3so, pdf_ext, precache): filters = [("String", "==", "Omega")] with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", storage_options=s3so, filters=filters, open_file_options={"precache_options": {"method": precache}}, @@ -360,13 +360,13 @@ def test_write_parquet(s3_base, s3so, pdf, partition_cols): gdf = cudf.from_pandas(pdf) with s3_context(s3_base=s3_base, bucket=bname) as s3fs: gdf.to_parquet( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", partition_cols=partition_cols, storage_options=s3so, ) - assert s3fs.exists("s3://{}/{}".format(bname, fname)) + assert s3fs.exists(f"s3://{bname}/{fname}") - got = pd.read_parquet(s3fs.open("s3://{}/{}".format(bname, fname))) + got = pd.read_parquet(s3fs.open(f"s3://{bname}/{fname}")) assert_eq(pdf, got) @@ -383,7 +383,7 @@ def test_read_json(s3_base, s3so): with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_json( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", engine="cudf", orient="records", lines=True, @@ -407,7 +407,7 @@ def test_read_orc(s3_base, s3so, datadir, use_python_file_object, columns): with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): got = cudf.read_orc( - "s3://{}/{}".format(bname, fname), + f"s3://{bname}/{fname}", columns=columns, storage_options=s3so, use_python_file_object=use_python_file_object, @@ -432,7 +432,7 @@ def test_read_orc_arrow_nativefile(s3_base, s3so, datadir, columns): fs = pa_fs.S3FileSystem( endpoint_override=s3so["client_kwargs"]["endpoint_url"], ) - with fs.open_input_file("{}/{}".format(bname, fname)) as fil: + with fs.open_input_file(f"{bname}/{fname}") as fil: got = cudf.read_orc(fil, columns=columns) if columns: @@ -445,10 +445,10 @@ def test_write_orc(s3_base, s3so, pdf): bname = "orc" gdf = cudf.from_pandas(pdf) with s3_context(s3_base=s3_base, bucket=bname) as s3fs: - gdf.to_orc("s3://{}/{}".format(bname, fname), storage_options=s3so) - assert s3fs.exists("s3://{}/{}".format(bname, fname)) + gdf.to_orc(f"s3://{bname}/{fname}", storage_options=s3so) + assert s3fs.exists(f"s3://{bname}/{fname}") - with s3fs.open("s3://{}/{}".format(bname, fname)) as f: + with s3fs.open(f"s3://{bname}/{fname}") as f: got = pa.orc.ORCFile(f).read().to_pandas() assert_eq(pdf, got) diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index 00cd31e7539..10c3689fcd7 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -105,7 +105,7 @@ def test_series_argsort(nelem, dtype, asc): ) def test_series_sort_index(nelem, asc): np.random.seed(0) - sr = Series((100 * np.random.random(nelem))) + sr = Series(100 * np.random.random(nelem)) psr = sr.to_pandas() expected = psr.sort_index(ascending=asc) diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index a447a60c709..5ff66fc750f 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -763,7 +763,7 @@ def test_read_text(datadir): chess_file = str(datadir) + "/chess.pgn" delimiter = "1." - with open(chess_file, "r") as f: + with open(chess_file) as f: content = f.read().split(delimiter) # Since Python split removes the delimiter and read_text does diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py index 021c4052759..bd7ee45fbf8 100644 --- a/python/cudf/cudf/tests/test_transform.py +++ b/python/cudf/cudf/tests/test_transform.py @@ -1,6 +1,5 @@ # Copyright (c) 2018-2020, NVIDIA CORPORATION. -from __future__ import division import numpy as np import pytest diff --git a/python/cudf/cudf/tests/test_udf_binops.py b/python/cudf/cudf/tests/test_udf_binops.py index 935c3868a68..8f4d2e695d9 100644 --- a/python/cudf/cudf/tests/test_udf_binops.py +++ b/python/cudf/cudf/tests/test_udf_binops.py @@ -1,5 +1,4 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from __future__ import division import numpy as np import pytest diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index 22c78b5f933..dd736abd7d0 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -1,5 +1,3 @@ -from __future__ import division - import itertools import operator import re diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py index fa5cde76524..7876e3652f3 100644 --- a/python/cudf/cudf/utils/applyutils.py +++ b/python/cudf/cudf/utils/applyutils.py @@ -125,7 +125,7 @@ def make_aggregate_nullmask(df, columns=None, op="and"): return out_mask -class ApplyKernelCompilerBase(object): +class ApplyKernelCompilerBase: def __init__( self, func, incols, outcols, kwargs, pessimistic_nulls, cache_key ): @@ -251,7 +251,7 @@ def row_wise_kernel({args}): srcidx.format(a=a, start=start, stop=stop, stride=stride) ) - body.append("inner({})".format(args)) + body.append(f"inner({args})") indented = ["{}{}".format(" " * 4, ln) for ln in body] # Finalize source @@ -307,7 +307,7 @@ def chunk_wise_kernel(nrows, chunks, {args}): slicedargs = {} for a in argnames: if a not in extras: - slicedargs[a] = "{}[start:stop]".format(a) + slicedargs[a] = f"{a}[start:stop]" else: slicedargs[a] = str(a) body.append( @@ -359,4 +359,4 @@ def _load_cache_or_make_chunk_wise_kernel(func, *args, **kwargs): def _mangle_user(name): """Mangle user variable name""" - return "__user_{}".format(name) + return f"__user_{name}" diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index f0533dcaa72..742c747ab69 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -218,7 +218,7 @@ def make_cache_key(udf, sig): codebytes = udf.__code__.co_code constants = udf.__code__.co_consts if udf.__closure__ is not None: - cvars = tuple([x.cell_contents for x in udf.__closure__]) + cvars = tuple(x.cell_contents for x in udf.__closure__) cvarbytes = dumps(cvars) else: cvarbytes = b"" diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 44bbb1b493d..4cd1738996f 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -160,8 +160,8 @@ def numeric_normalize_types(*args): def _find_common_type_decimal(dtypes): # Find the largest scale and the largest difference between # precision and scale of the columns to be concatenated - s = max([dtype.scale for dtype in dtypes]) - lhs = max([dtype.precision - dtype.scale for dtype in dtypes]) + s = max(dtype.scale for dtype in dtypes) + lhs = max(dtype.precision - dtype.scale for dtype in dtypes) # Combine to get the necessary precision and clip at the maximum # precision p = s + lhs @@ -525,7 +525,7 @@ def find_common_type(dtypes): ) for dtype in dtypes ): - if len(set(dtype._categories.dtype for dtype in dtypes)) == 1: + if len({dtype._categories.dtype for dtype in dtypes}) == 1: return cudf.CategoricalDtype( cudf.core.column.concat_columns( [dtype._categories for dtype in dtypes] diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py index 45004c5f107..58e0541d3db 100644 --- a/python/cudf/cudf/utils/hash_vocab_utils.py +++ b/python/cudf/cudf/utils/hash_vocab_utils.py @@ -80,9 +80,9 @@ def _pick_initial_a_b(data, max_constant, init_bins): if score <= max_constant and longest <= MAX_SIZE_FOR_INITIAL_BIN: print( - "Attempting to build table using {:.6f}n space".format(score) + f"Attempting to build table using {score:.6f}n space" ) - print("Longest bin was {}".format(longest)) + print(f"Longest bin was {longest}") break return bins, a, b @@ -170,7 +170,7 @@ def _pack_keys_and_values(flattened_hash_table, original_dict): def _load_vocab_dict(path): vocab = {} - with open(path, mode="r", encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: counter = 0 for line in f: vocab[line.strip()] = counter @@ -193,17 +193,17 @@ def _store_func( ): with open(out_name, mode="w+") as f: - f.write("{}\n".format(outer_a)) - f.write("{}\n".format(outer_b)) - f.write("{}\n".format(num_outer_bins)) + f.write(f"{outer_a}\n") + f.write(f"{outer_b}\n") + f.write(f"{num_outer_bins}\n") f.writelines( - "{} {}\n".format(coeff, offset) + f"{coeff} {offset}\n" for coeff, offset in zip(inner_table_coeffs, offsets_into_ht) ) - f.write("{}\n".format(len(hash_table))) - f.writelines("{}\n".format(kv) for kv in hash_table) + f.write(f"{len(hash_table)}\n") + f.writelines(f"{kv}\n" for kv in hash_table) f.writelines( - "{}\n".format(tok_id) + f"{tok_id}\n" for tok_id in [unk_tok_id, first_token_id, sep_token_id] ) @@ -295,6 +295,6 @@ def hash_vocab( ) assert ( val == value - ), "Incorrect value found. Got {} expected {}".format(val, value) + ), f"Incorrect value found. Got {val} expected {value}" print("All present tokens return correct value.") diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index d9153c2b1d2..64218ddf46a 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -136,7 +136,7 @@ def query_compile(expr): key "args" is a sequence of name of the arguments. """ - funcid = "queryexpr_{:x}".format(np.uintp(hash(expr))) + funcid = f"queryexpr_{np.uintp(hash(expr)):x}" # Load cache compiled = _cache.get(funcid) # Cache not found @@ -147,7 +147,7 @@ def query_compile(expr): # compile devicefn = cuda.jit(device=True)(fn) - kernelid = "kernel_{}".format(funcid) + kernelid = f"kernel_{funcid}" kernel = _wrap_query_expr(kernelid, devicefn, args) compiled = info.copy() @@ -173,10 +173,10 @@ def _add_idx(arg): if arg.startswith(ENVREF_PREFIX): return arg else: - return "{}[idx]".format(arg) + return f"{arg}[idx]" def _add_prefix(arg): - return "_args_{}".format(arg) + return f"_args_{arg}" glbls = {"queryfn": fn, "cuda": cuda} kernargs = map(_add_prefix, args) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index a8e14504469..7e49bcb60ce 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -64,7 +64,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_version = None with open( - os.path.join(cuda_include_dir, "cuda.h"), "r", encoding="utf-8" + os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8" ) as f: for line in f.readlines(): if re.search(r"#define CUDA_VERSION ", line) is not None: diff --git a/python/cudf/versioneer.py b/python/cudf/versioneer.py index a6537a34ede..6bb75d8ae85 100644 --- a/python/cudf/versioneer.py +++ b/python/cudf/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/cudf_kafka/cudf_kafka/_version.py b/python/cudf_kafka/cudf_kafka/_version.py index 5ab5c72e457..8475afe1a6c 100644 --- a/python/cudf_kafka/cudf_kafka/_version.py +++ b/python/cudf_kafka/cudf_kafka/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/cudf_kafka/versioneer.py b/python/cudf_kafka/versioneer.py index 2260d5c2dcf..253f0547a94 100644 --- a/python/cudf_kafka/versioneer.py +++ b/python/cudf_kafka/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/custreamz/custreamz/_version.py b/python/custreamz/custreamz/_version.py index a3409a06953..8131fbf0c30 100644 --- a/python/custreamz/custreamz/_version.py +++ b/python/custreamz/custreamz/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py index 24f6e46f6c5..a7378408c24 100644 --- a/python/custreamz/custreamz/tests/test_dataframes.py +++ b/python/custreamz/custreamz/tests/test_dataframes.py @@ -4,7 +4,6 @@ Tests for Streamz Dataframes (SDFs) built on top of cuDF DataFrames. *** Borrowed from streamz.dataframe.tests | License at thirdparty/LICENSE *** """ -from __future__ import division, print_function import json import operator diff --git a/python/custreamz/versioneer.py b/python/custreamz/versioneer.py index 9c9ddae7340..d2a75c38787 100644 --- a/python/custreamz/versioneer.py +++ b/python/custreamz/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py index 8ca2cf98381..85dbc55c197 100644 --- a/python/dask_cudf/dask_cudf/_version.py +++ b/python/dask_cudf/dask_cudf/_version.py @@ -86,7 +86,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -96,7 +96,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -308,9 +308,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index e191873f82b..729db6c232d 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -516,7 +516,7 @@ def _extract_meta(x): elif isinstance(x, list): return [_extract_meta(_x) for _x in x] elif isinstance(x, tuple): - return tuple([_extract_meta(_x) for _x in x]) + return tuple(_extract_meta(_x) for _x in x) elif isinstance(x, dict): return {k: _extract_meta(v) for k, v in x.items()} return x @@ -611,9 +611,7 @@ def reduction( if not isinstance(args, (tuple, list)): args = [args] - npartitions = set( - arg.npartitions for arg in args if isinstance(arg, _Frame) - ) + npartitions = {arg.npartitions for arg in args if isinstance(arg, _Frame)} if len(npartitions) > 1: raise ValueError("All arguments must have same number of partitions") npartitions = npartitions.pop() @@ -636,7 +634,7 @@ def reduction( ) # Chunk - a = "{0}-chunk-{1}".format(token or funcname(chunk), token_key) + a = f"{token or funcname(chunk)}-chunk-{token_key}" if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs: dsk = { (a, 0, i): (chunk, key) @@ -654,7 +652,7 @@ def reduction( } # Combine - b = "{0}-combine-{1}".format(token or funcname(combine), token_key) + b = f"{token or funcname(combine)}-combine-{token_key}" k = npartitions depth = 0 while k > split_every: @@ -670,7 +668,7 @@ def reduction( depth += 1 # Aggregate - b = "{0}-agg-{1}".format(token or funcname(aggregate), token_key) + b = f"{token or funcname(aggregate)}-agg-{token_key}" conc = (list, [(a, depth, i) for i in range(k)]) if aggregate_kwargs: dsk[(b, 0)] = (apply, aggregate, [conc], aggregate_kwargs) diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index 00fc197da9b..76c1978b83c 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -79,7 +79,9 @@ def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs): ex = set(columns) - set(schema) if ex: raise ValueError( - "Requested columns (%s) not in schema (%s)" % (ex, set(schema)) + "Requested columns ({}) not in schema ({})".format( + ex, set(schema) + ) ) else: columns = list(schema) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index 706b0e272ea..3e59b9c3fcc 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -40,12 +40,12 @@ def test_roundtrip_from_dask(tmpdir, stats): tmpdir = str(tmpdir) ddf.to_parquet(tmpdir, engine="pyarrow") files = sorted( - [ + ( os.path.join(tmpdir, f) for f in os.listdir(tmpdir) # TODO: Allow "_metadata" in list after dask#6047 if not f.endswith("_metadata") - ], + ), key=natural_sort_key, ) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 39491a45e7e..44534632117 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -34,7 +34,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_version = None with open( - os.path.join(cuda_include_dir, "cuda.h"), "r", encoding="utf-8" + os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8" ) as f: for line in f.readlines(): if re.search(r"#define CUDA_VERSION ", line) is not None: diff --git a/python/dask_cudf/versioneer.py b/python/dask_cudf/versioneer.py index a560f2e8797..0a66806cb6d 100644 --- a/python/dask_cudf/versioneer.py +++ b/python/dask_cudf/versioneer.py @@ -275,7 +275,6 @@ """ -from __future__ import print_function import errno import json @@ -345,7 +344,7 @@ def get_config_from_root(root): # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: + with open(setup_cfg) as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory @@ -407,7 +406,7 @@ def run_command( stderr=(subprocess.PIPE if hide_stderr else None), ) break - except EnvironmentError: + except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue @@ -417,7 +416,7 @@ def run_command( return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: @@ -964,7 +963,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs, "r") + f = open(versionfile_abs) for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -979,7 +978,7 @@ def git_get_keywords(versionfile_abs): if mo: keywords["date"] = mo.group(1) f.close() - except EnvironmentError: + except OSError: pass return keywords @@ -1003,11 +1002,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1016,7 +1015,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1123,9 +1122,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( + full_tag, tag_prefix, ) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] @@ -1175,13 +1173,13 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") + f = open(".gitattributes") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() - except EnvironmentError: + except OSError: pass if not present: f = open(".gitattributes", "a+") @@ -1245,7 +1243,7 @@ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search( r"version_json = '''\n(.*)''' # END VERSION_JSON", @@ -1272,7 +1270,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1497,7 +1495,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1772,11 +1770,7 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print( "Adding sample versioneer config to setup.cfg", file=sys.stderr @@ -1803,9 +1797,9 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() - except EnvironmentError: + except OSError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) @@ -1824,12 +1818,12 @@ def do_setup(): manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: - with open(manifest_in, "r") as f: + with open(manifest_in) as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) - except EnvironmentError: + except OSError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so @@ -1863,7 +1857,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import")