rapidsai · rapids-bot · Feb 4, 2022 · Jan 26, 2022 · Jan 26, 2022 · Jan 26, 2022
@@ -7,11 +7,11 @@
 from enum import Enum
 
 
-startingPatt = re.compile("^STARTING: ([\w\.\-]+)$")
-skippingPatt = re.compile("^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
-exitCodePatt = re.compile("^EXIT CODE: (\d+)$")
-folderPatt = re.compile("^FOLDER: ([\w\.\-]+)$")
-timePatt = re.compile("^real\s+([\d\.ms]+)$")
+startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$")
+skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
+exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$")
+folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$")
+timePatt = re.compile(r"^real\s+([\d\.ms]+)$")
 linePatt = re.compile("^" + ("-" * 80) + "$")
 
 

@@ -13,7 +13,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
 
 import argparse
 import os
@@ -124,19 +123,19 @@ def run_clang_format(src, dst, exe, verbose, inplace):
         os.makedirs(dstdir)
     # run the clang format command itself
     if src == dst:
-        cmd = "%s -i %s" % (exe, src)
+        cmd = f"{exe} -i {src}"
     else:
-        cmd = "%s %s > %s" % (exe, src, dst)
+        cmd = f"{exe} {src} > {dst}"
     try:
         subprocess.check_call(cmd, shell=True)
     except subprocess.CalledProcessError:
         print("Failed to run clang-format! Maybe your env is not proper?")
         raise
     # run the diff to check if there are any formatting issues
     if inplace:
-        cmd = "diff -q %s %s >/dev/null" % (src, dst)
+        cmd = f"diff -q {src} {dst} >/dev/null"
     else:
-        cmd = "diff %s %s" % (src, dst)
+        cmd = f"diff {src} {dst}"
 
     try:
         subprocess.check_call(cmd, shell=True)

@@ -13,7 +13,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
 import re
 import os
 import subprocess
@@ -67,7 +66,7 @@ def parse_args():
 
 
 def get_all_commands(cdb):
-    with open(cdb, "r") as fp:
+    with open(cdb) as fp:
         return json.load(fp)
 
 
@@ -195,10 +194,10 @@ def collect_result(result):
 
 def print_result(passed, stdout, file):
     status_str = "PASSED" if passed else "FAILED"
-    print("%s File:%s %s %s" % (SEPARATOR, file, status_str, SEPARATOR))
+    print(f"{SEPARATOR} File:{file} {status_str} {SEPARATOR}")
     if stdout:
         print(stdout)
-        print("%s File:%s ENDS %s" % (SEPARATOR, file, SEPARATOR))
+        print(f"{SEPARATOR} File:{file} ENDS {SEPARATOR}")
 
 
 def print_results():

@@ -33,7 +33,7 @@
 
 # build a map of the log entries
 entries = {}
-with open(log_file, "r") as log:
+with open(log_file) as log:
     last = 0
     files = {}
     for line in log:

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
@@ -1,6 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 #
 # cudf documentation build configuration file, created by

@@ -14,7 +14,7 @@
 )
 
 
-class Fuzzer(object):
+class Fuzzer:
     def __init__(
         self,
         target,

@@ -16,7 +16,7 @@
 )
 
 
-class IOFuzz(object):
+class IOFuzz:
     def __init__(
         self,
         dirs=None,
@@ -59,7 +59,7 @@ def __init__(
         self._current_buffer = None
 
     def _load_params(self, path):
-        with open(path, "r") as f:
+        with open(path) as f:
             params = json.load(f)
         self._inputs.append(params)
 

@@ -3,7 +3,7 @@
 from cudf._fuzz_testing import fuzzer
 
 
-class PythonFuzz(object):
+class PythonFuzz:
     def __init__(self, func, params=None, data_handle=None, **kwargs):
         self.function = func
         self.data_handler_class = data_handle

@@ -86,7 +86,7 @@ def run_command(
                 stderr=(subprocess.PIPE if hide_stderr else None),
             )
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
@@ -96,7 +96,7 @@ def run_command(
             return None, None
     else:
         if verbose:
-            print("unable to find command, tried %s" % (commands,))
+            print(f"unable to find command, tried {commands}")
         return None, None
     stdout = p.communicate()[0].strip()
     if sys.version_info[0] >= 3:
@@ -149,7 +149,7 @@ def git_get_keywords(versionfile_abs):
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs, "r")
+        f = open(versionfile_abs)
         for line in f.readlines():
             if line.strip().startswith("git_refnames ="):
                 mo = re.search(r'=\s*"(.*)"', line)
@@ -164,7 +164,7 @@ def git_get_keywords(versionfile_abs):
                 if mo:
                     keywords["date"] = mo.group(1)
         f.close()
-    except EnvironmentError:
+    except OSError:
         pass
     return keywords
 
@@ -188,11 +188,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         if verbose:
             print("keywords are unexpanded, not using")
         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -201,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r"\d", r)])
+        tags = {r for r in refs if re.search(r"\d", r)}
         if verbose:
             print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:
@@ -308,10 +308,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
             if verbose:
                 fmt = "tag '%s' doesn't start with prefix '%s'"
                 print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
-                full_tag,
-                tag_prefix,
-            )
+            pieces[
+                "error"
+            ] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'"
             return pieces
         pieces["closest-tag"] = full_tag[len(tag_prefix) :]
 

@@ -58,7 +58,7 @@ def to_dict(self):
         return dc
 
 
-class GpuArrowNodeReader(object):
+class GpuArrowNodeReader:
     def __init__(self, table, index):
         self._table = table
         self._field = table.schema[index]

@@ -1,6 +1,6 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
-from __future__ import annotations, division, print_function
+from __future__ import annotations
 
 import pickle
 import warnings

@@ -5083,7 +5083,7 @@ def to_arrow(self) -> pa.Array:
         """
         if self.null_count == len(self):
             return pa.NullArray.from_buffers(
-                pa.null(), len(self), [pa.py_buffer((b""))]
+                pa.null(), len(self), [pa.py_buffer(b"")]
             )
         else:
             return super().to_arrow()

@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
-from __future__ import annotations, division
+from __future__ import annotations
 
 import functools
 import inspect
@@ -4229,7 +4229,7 @@ def _verbose_repr():
                 dtype = self.dtypes.iloc[i]
                 col = pprint_thing(col)
 
-                line_no = _put_str(" {num}".format(num=i), space_num)
+                line_no = _put_str(f" {i}", space_num)
                 count = ""
                 if show_counts:
                     count = counts[i]
@@ -5576,9 +5576,7 @@ def select_dtypes(self, include=None, exclude=None):
                     if issubclass(dtype.type, e_dtype):
                         exclude_subtypes.add(dtype.type)
 
-        include_all = set(
-            [cudf_dtype_from_pydata_dtype(d) for d in self.dtypes]
-        )
+        include_all = {cudf_dtype_from_pydata_dtype(d) for d in self.dtypes}
 
         if include:
             inclusion = include_all & include_subtypes
@@ -6329,8 +6327,8 @@ def _align_indices(lhs, rhs):
         lhs_out = DataFrame(index=df.index)
         rhs_out = DataFrame(index=df.index)
         common = set(lhs.columns) & set(rhs.columns)
-        common_x = set(["{}_x".format(x) for x in common])
-        common_y = set(["{}_y".format(x) for x in common])
+        common_x = {f"{x}_x" for x in common}
+        common_y = {f"{x}_y" for x in common}
         for col in df.columns:
             if col in common_x:
                 lhs_out[col[:-2]] = df[col]

@@ -1461,7 +1461,7 @@ def apply(self, func):
 
 
 # TODO: should we define this as a dataclass instead?
-class Grouper(object):
+class Grouper:
     def __init__(
         self, key=None, level=None, freq=None, closed=None, label=None
     ):

@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
-from __future__ import annotations, division, print_function
+from __future__ import annotations
 
 import math
 import pickle

@@ -169,13 +169,11 @@ def __init__(
             if on
             else set()
             if (self._using_left_index or self._using_right_index)
-            else set(
-                [
-                    lkey.name
-                    for lkey, rkey in zip(self._left_keys, self._right_keys)
-                    if lkey.name == rkey.name
-                ]
-            )
+            else {
+                lkey.name
+                for lkey, rkey in zip(self._left_keys, self._right_keys)
+                if lkey.name == rkey.name
+            }
         )
 
     def perform_merge(self) -> Frame:

@@ -115,7 +115,7 @@ def __init__(
                 "MultiIndex has unequal number of levels and "
                 "codes and is inconsistent!"
             )
-        if len(set(c.size for c in codes._data.columns)) != 1:
+        if len({c.size for c in codes._data.columns}) != 1:
             raise ValueError(
                 "MultiIndex length of codes does not match "
                 "and is inconsistent!"
@@ -752,7 +752,7 @@ def _index_and_downcast(self, result, index, index_key):
             # Pandas returns an empty Series with a tuple as name
             # the one expected result column
             result = cudf.Series._from_data(
-                {}, name=tuple((col[0] for col in index._data.columns))
+                {}, name=tuple(col[0] for col in index._data.columns)
             )
         elif out_index._num_columns == 1:
             # If there's only one column remaining in the output index, convert
@@ -1202,7 +1202,7 @@ def _poplevels(self, level):
         if not pd.api.types.is_list_like(level):
             level = (level,)
 
-        ilevels = sorted([self._level_index_from_level(lev) for lev in level])
+        ilevels = sorted(self._level_index_from_level(lev) for lev in level)
 
         if not ilevels:
             return None

@@ -17,7 +17,7 @@
 )
 
 
-class Scalar(object):
+class Scalar:
     """
     A GPU-backed scalar object with NumPy scalar like properties
     May be used in binary operations against other scalars, cuDF

@@ -167,7 +167,7 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]:
             if (
                 isinstance(arg, tuple)
                 and len(arg) == self._frame._index.nlevels
-                and not any((isinstance(x, slice) for x in arg))
+                and not any(isinstance(x, slice) for x in arg)
             ):
                 result = result.iloc[0]
             return result
@@ -2969,7 +2969,7 @@ def _prepare_percentiles(percentiles):
             return percentiles
 
         def _format_percentile_names(percentiles):
-            return ["{0}%".format(int(x * 100)) for x in percentiles]
+            return [f"{int(x * 100)}%" for x in percentiles]
 
         def _format_stats_values(stats_data):
             return map(lambda x: round(x, 6), stats_data)
@@ -3071,7 +3071,7 @@ def _describe_timestamp(self):
                         .to_numpy(na_value=np.nan),
                     )
                 ),
-                "max": str(pd.Timestamp((self.max()))),
+                "max": str(pd.Timestamp(self.max())),
             }
 
             return Series(
@@ -3550,7 +3550,7 @@ def wrapper(self, other, level=None, fill_value=None, axis=0):
     setattr(Series, binop, make_binop_func(binop))
 
 
-class DatetimeProperties(object):
+class DatetimeProperties:
     """
     Accessor object for datetimelike properties of the Series values.
 
@@ -4492,7 +4492,7 @@ def strftime(self, date_format, *args, **kwargs):
         )
 
 
-class TimedeltaProperties(object):
+class TimedeltaProperties:
     """
     Accessor object for timedeltalike properties of the Series values.
 

@@ -133,8 +133,8 @@ def typeof_masked(val, c):
 class MaskedConstructor(ConcreteTemplate):
     key = api.Masked
     units = ["ns", "ms", "us", "s"]
-    datetime_cases = set(types.NPDatetime(u) for u in units)
-    timedelta_cases = set(types.NPTimedelta(u) for u in units)
+    datetime_cases = {types.NPDatetime(u) for u in units}
+    timedelta_cases = {types.NPTimedelta(u) for u in units}
     cases = [
         nb_signature(MaskedType(t), t, types.boolean)
         for t in (
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,7 @@ @@
     )
-    class Fuzzer(object):
+    class Fuzzer:
         def __init__(
             self,
             target,
@@ Expand Down @@