Standardize imports. (#10680)

This PR standardizes a few imports across the cudf code base. Changes include: - Removed usage of some non-standard "two letter" names. For example, `import numpy as np` is common, but `import pyorc as po` and `import fastavro as fa` are non-standard and not the style used by their documentation. I left `import cupy as cp`, since both `import cupy` and `import cupy as cp` are prevalent in the code base (the one exception that I changed was a file that had both `import cupy` and `import cupy as cp`). - Avoid the pattern `from some_package import x as x` -- just write `from some_package import x` - Fixed some `cimport`s - Always use `import datetime` instead of `import datetime as dt` to avoid conflicts with the many other `dt` names in our code (including local names that had the potential to shadow/overwrite the library's name) - Use `warnings.warn` rather than `from warnings import warn` for consistency across the library - Remove some legacy Python 2 compatibility Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #10680
rapidsai · Apr 18, 2022 · c322cba · c322cba
1 parent 45c003d
commit c322cba
Show file tree

Hide file tree

Showing 20 changed files with 101 additions and 97 deletions.
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
@@ -7,7 +7,7 @@ import pandas as pd
 import rmm
 
 import cudf
-import cudf._lib as libcudfxx
+import cudf._lib as libcudf
 from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
 from cudf.core.buffer import Buffer
 
@@ -160,7 +160,7 @@ cdef class Column:
             if self.base_mask is None or self.offset == 0:
                 self._mask = self.base_mask
             else:
-                self._mask = libcudfxx.null_mask.copy_bitmask(self)
+                self._mask = libcudf.null_mask.copy_bitmask(self)
         return self._mask
 
     @property

diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from enum import Enum
 
@@ -8,9 +8,6 @@ from libcpp.utility cimport move
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from cudf._lib.column cimport Column
-
-import cudf._lib as libcudfxx
-
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.null_mask cimport (
     bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 # cython: boundscheck = False
 
@@ -17,7 +17,7 @@ except ImportError:
     import json
 
 import numpy as np
-from cython.operator import dereference
+from cython.operator cimport dereference
 
 from cudf.api.types import (
     is_categorical_dtype,

diff --git a/python/cudf/cudf/_lib/rolling.pyx b/python/cudf/cudf/_lib/rolling.pyx
@@ -1,6 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-from __future__ import print_function
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import pandas as pd
 

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
-from warnings import warn
+import warnings
 
 import cupy as cp
 import numpy as np
@@ -50,7 +50,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
         raise NotImplementedError("na_sentinel can not be None.")
 
     if size_hint:
-        warn("size_hint is not applicable for cudf.factorize")
+        warnings.warn("size_hint is not applicable for cudf.factorize")
 
     return_cupy_array = isinstance(values, cp.ndarray)
 

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import datetime as dt
+import datetime
 import locale
 import re
 from locale import nl_langinfo
@@ -237,9 +237,9 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
         if isinstance(other, (cudf.Scalar, ColumnBase, cudf.DateOffset)):
             return other
 
-        if isinstance(other, dt.datetime):
+        if isinstance(other, datetime.datetime):
             other = np.datetime64(other)
-        elif isinstance(other, dt.timedelta):
+        elif isinstance(other, datetime.timedelta):
             other = np.timedelta64(other)
         elif isinstance(other, pd.Timestamp):
             other = other.to_datetime64()

diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
@@ -1,8 +1,8 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
+import warnings
 from decimal import Decimal
 from typing import Any, Sequence, Tuple, Union, cast
-from warnings import warn
 
 import cupy as cp
 import numpy as np
@@ -43,7 +43,7 @@ def as_decimal_column(
             isinstance(dtype, cudf.core.dtypes.DecimalDtype)
             and dtype.scale < self.dtype.scale
         ):
-            warn(
+            warnings.warn(
                 "cuDF truncates when downcasting decimals to a lower scale. "
                 "To round, use Series.round() or DataFrame.round()."
             )

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import datetime as dt
+import datetime
 from typing import Any, Sequence, cast
 
 import numpy as np
@@ -211,7 +211,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
     def normalize_binop_value(self, other) -> ColumnBinaryOperand:
         if isinstance(other, (ColumnBase, cudf.Scalar)):
             return other
-        if isinstance(other, dt.timedelta):
+        if isinstance(other, datetime.timedelta):
             other = np.timedelta64(other)
         elif isinstance(other, pd.Timestamp):
             other = other.to_datetime64()

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -5596,14 +5596,14 @@ def select_dtypes(self, include=None, exclude=None):
     @ioutils.doc_to_parquet()
     def to_parquet(self, path, *args, **kwargs):
         """{docstring}"""
-        from cudf.io import parquet as pq
+        from cudf.io import parquet
 
-        return pq.to_parquet(self, path, *args, **kwargs)
+        return parquet.to_parquet(self, path, *args, **kwargs)
 
     @ioutils.doc_to_feather()
     def to_feather(self, path, *args, **kwargs):
         """{docstring}"""
-        from cudf.io import feather as feather
+        from cudf.io import feather
 
         feather.to_feather(self, path, *args, **kwargs)
 
@@ -5623,7 +5623,7 @@ def to_csv(
         **kwargs,
     ):
         """{docstring}"""
-        from cudf.io import csv as csv
+        from cudf.io import csv
 
         return csv.to_csv(
             self,
@@ -5643,7 +5643,7 @@ def to_csv(
     @ioutils.doc_to_orc()
     def to_orc(self, fname, compression=None, *args, **kwargs):
         """{docstring}"""
-        from cudf.io import orc as orc
+        from cudf.io import orc
 
         orc.to_orc(self, fname, compression, *args, **kwargs)
 

diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
+import warnings
 from typing import Union
-from warnings import warn
 
 import cupy as cp
 
@@ -186,7 +186,7 @@ def __call__(
                 "When truncation is not True, the behaviour currently differs "
                 "from HuggingFace as cudf always returns overflowing tokens"
             )
-            warn(warning_msg)
+            warnings.warn(warning_msg)
 
         if padding != "max_length":
             error_msg = (

diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py
@@ -3,10 +3,10 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pandas.api import types as ptypes
+from pandas.api import types as pd_types
 
 import cudf
-from cudf.api import types as types
+from cudf.api import types
 
 
 @pytest.mark.parametrize(
@@ -1035,11 +1035,13 @@ def test_is_decimal_dtype(obj, expect):
     ),
 )
 def test_pandas_agreement(obj):
-    assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj)
-    assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj)
-    assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj)
-    assert types.is_integer(obj) == ptypes.is_integer(obj)
-    assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj)
+    assert types.is_categorical_dtype(obj) == pd_types.is_categorical_dtype(
+        obj
+    )
+    assert types.is_numeric_dtype(obj) == pd_types.is_numeric_dtype(obj)
+    assert types.is_integer_dtype(obj) == pd_types.is_integer_dtype(obj)
+    assert types.is_integer(obj) == pd_types.is_integer(obj)
+    assert types.is_string_dtype(obj) == pd_types.is_string_dtype(obj)
 
 
 @pytest.mark.parametrize(
@@ -1115,7 +1117,7 @@ def test_pandas_agreement(obj):
     ),
 )
 def test_pandas_agreement_scalar(obj):
-    assert types.is_scalar(obj) == ptypes.is_scalar(obj)
+    assert types.is_scalar(obj) == pd_types.is_scalar(obj)
 
 
 # TODO: Add test of interval.

diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
@@ -1,4 +1,6 @@
-from datetime import datetime as dt
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
+import datetime
 
 import numpy as np
 import pandas as pd
@@ -41,12 +43,12 @@ def get_string_series():
 testdata_all = [
     (
         cudf_date_series("20010101", "20020215", freq="400h"),
-        dt.strptime("2001-01-01", "%Y-%m-%d"),
+        datetime.datetime.strptime("2001-01-01", "%Y-%m-%d"),
         True,
     ),
     (
         cudf_date_series("20010101", "20020215", freq="400h"),
-        dt.strptime("2000-01-01", "%Y-%m-%d"),
+        datetime.datetime.strptime("2000-01-01", "%Y-%m-%d"),
         False,
     ),
     (cudf_date_series("20010101", "20020215", freq="400h"), 20000101, False),

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
@@ -13,7 +13,6 @@
 from copy import copy
 
 import cupy
-import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -7332,7 +7331,7 @@ def test_sample_axis_0(
 
 @pytest.mark.parametrize("replace", [True, False])
 @pytest.mark.parametrize(
-    "random_state_lib", [cp.random.RandomState, np.random.RandomState]
+    "random_state_lib", [cupy.random.RandomState, np.random.RandomState]
 )
 def test_sample_reproducibility(replace, random_state_lib):
     df = cudf.DataFrame({"a": cupy.arange(0, 1024)})
@@ -7384,7 +7383,7 @@ def test_oversample_without_replace(n, frac, axis):
     )
 
 
-@pytest.mark.parametrize("random_state", [None, cp.random.RandomState(42)])
+@pytest.mark.parametrize("random_state", [None, cupy.random.RandomState(42)])
 def test_sample_unsupported_arguments(random_state):
     df = cudf.DataFrame({"float": [0.05, 0.2, 0.3, 0.2, 0.25]})
     with pytest.raises(

diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 import datetime
-import datetime as dt
 import operator
 import re
 
@@ -219,8 +218,8 @@ def test_sort_datetime():
 
 def test_issue_165():
     df_pandas = pd.DataFrame()
-    start_date = dt.datetime.strptime("2000-10-21", "%Y-%m-%d")
-    data = [(start_date + dt.timedelta(days=x)) for x in range(6)]
+    start_date = datetime.datetime.strptime("2000-10-21", "%Y-%m-%d")
+    data = [(start_date + datetime.timedelta(days=x)) for x in range(6)]
     df_pandas["dates"] = data
     df_pandas["num"] = [1, 2, 3, 4, 5, 6]
     df_cudf = DataFrame.from_pandas(df_pandas)

diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import itertools as it
+import itertools
 import random
 
 import numpy as np
@@ -280,7 +280,7 @@ def test_drop_duplicates_empty(df):
 
 @pytest.mark.parametrize("num_columns", [3, 4, 5])
 def test_dataframe_drop_duplicates_numeric_method(num_columns):
-    comb = list(it.permutations(range(num_columns), num_columns))
+    comb = list(itertools.permutations(range(num_columns), num_columns))
     shuf = list(comb)
     random.Random(num_columns).shuffle(shuf)
 

diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py
@@ -3,12 +3,12 @@
 import os
 from io import BytesIO
 
-import fastavro as fa
+import fastavro
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
-from pyarrow import orc as orc
+from pyarrow import orc
 
 import cudf
 from cudf.testing._utils import assert_eq
@@ -253,7 +253,7 @@ def test_read_avro(datadir, hdfs, test_url):
 
     got = cudf.read_avro(hd_fpath)
     with open(fname, mode="rb") as f:
-        expect = pd.DataFrame.from_records(fa.reader(f))
+        expect = pd.DataFrame.from_records(fastavro.reader(f))
 
     for col in expect.columns:
         expect[col] = expect[col].astype(got[col].dtype)