STYLE loosen inconsistent namespace check (pandas-dev#40532)

vladu · Apr 5, 2021 · 54db432 · 54db432
1 parent f2777db
commit 54db432
Show file tree

Hide file tree

Showing 10 changed files with 125 additions and 86 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -86,11 +86,10 @@ repos:
         types: [python]
         exclude: ^pandas/_typing\.py$
     -   id: inconsistent-namespace-usage
-        name: 'Check for inconsistent use of pandas namespace in tests'
+        name: 'Check for inconsistent use of pandas namespace'
         entry: python scripts/check_for_inconsistent_pandas_namespace.py
         language: python
         types: [python]
-        files: ^pandas/tests/
     -   id: incorrect-code-directives
         name: Check for incorrect code block or IPython directives
         language: pygrep

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
@@ -140,9 +140,7 @@ def setup(self, op, shape):
         # construct dataframe with 2 blocks
         arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
         arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
-        df = pd.concat(
-            [pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
-        )
+        df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True)
         # should already be the case, but just to be sure
         df._consolidate_inplace()
 
@@ -151,7 +149,7 @@ def setup(self, op, shape):
         arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
         arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
         df2 = pd.concat(
-            [pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
+            [DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)],
             axis=1,
             ignore_index=True,
         )
@@ -459,9 +457,9 @@ class OffsetArrayArithmetic:
 
     def setup(self, offset):
         N = 10000
-        rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
+        rng = date_range(start="1/1/2000", periods=N, freq="T")
         self.rng = rng
-        self.ser = pd.Series(rng)
+        self.ser = Series(rng)
 
     def time_add_series_offset(self, offset):
         with warnings.catch_warnings(record=True):
@@ -478,7 +476,7 @@ class ApplyIndex:
 
     def setup(self, offset):
         N = 10000
-        rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
+        rng = date_range(start="1/1/2000", periods=N, freq="T")
         self.rng = rng
 
     def time_apply_index(self, offset):
@@ -490,17 +488,17 @@ class BinaryOpsMultiIndex:
     param_names = ["func"]
 
     def setup(self, func):
-        date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
+        array = date_range("20200101 00:00", "20200102 0:00", freq="S")
         level_0_names = [str(i) for i in range(30)]
 
-        index = pd.MultiIndex.from_product([level_0_names, date_range])
+        index = pd.MultiIndex.from_product([level_0_names, array])
         column_names = ["col_1", "col_2"]
 
-        self.df = pd.DataFrame(
+        self.df = DataFrame(
             np.random.rand(len(index), 2), index=index, columns=column_names
         )
 
-        self.arg_df = pd.DataFrame(
+        self.arg_df = DataFrame(
             np.random.randint(1, 10, (len(level_0_names), 2)),
             index=level_0_names,
             columns=column_names,

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -28,7 +28,7 @@ def setup(self):
             data = np.random.randn(N)[:-i]
             idx = rng[:-i]
             data[100:] = np.nan
-            self.series[i] = pd.Series(pd.SparseArray(data), index=idx)
+            self.series[i] = Series(SparseArray(data), index=idx)
 
     def time_series_to_frame(self):
         pd.DataFrame(self.series)
@@ -63,7 +63,7 @@ def setup(self):
         )
 
     def time_sparse_series_from_coo(self):
-        pd.Series.sparse.from_coo(self.matrix)
+        Series.sparse.from_coo(self.matrix)
 
 
 class ToCoo:

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -207,12 +207,12 @@ def box_expected(expected, box_cls, transpose=True):
     """
     if box_cls is pd.array:
         expected = pd.array(expected)
-    elif box_cls is pd.Index:
-        expected = pd.Index(expected)
-    elif box_cls is pd.Series:
-        expected = pd.Series(expected)
-    elif box_cls is pd.DataFrame:
-        expected = pd.Series(expected).to_frame()
+    elif box_cls is Index:
+        expected = Index(expected)
+    elif box_cls is Series:
+        expected = Series(expected)
+    elif box_cls is DataFrame:
+        expected = Series(expected).to_frame()
         if transpose:
             # for vector operations, we need a DataFrame to be a single-row,
             #  not a single-column, in order to operate against non-DataFrame
@@ -400,7 +400,7 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None)
         "x": state.rand(n) * 2 - 1,
         "y": state.rand(n) * 2 - 1,
     }
-    df = pd.DataFrame(columns, index=index, columns=sorted(columns))
+    df = DataFrame(columns, index=index, columns=sorted(columns))
     if df.index[-1] == end:
         df = df.iloc[:-1]
     return df

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -328,7 +328,7 @@ def unique_nulls_fixture(request):
 # ----------------------------------------------------------------
 
 
-@pytest.fixture(params=[pd.DataFrame, pd.Series])
+@pytest.fixture(params=[DataFrame, Series])
 def frame_or_series(request):
     """
     Fixture to parametrize over DataFrame and Series.
@@ -338,7 +338,7 @@ def frame_or_series(request):
 
 # error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]"
 @pytest.fixture(
-    params=[pd.Index, pd.Series], ids=["index", "series"]  # type: ignore[list-item]
+    params=[Index, Series], ids=["index", "series"]  # type: ignore[list-item]
 )
 def index_or_series(request):
     """
@@ -356,9 +356,7 @@ def index_or_series(request):
 index_or_series2 = index_or_series
 
 
-@pytest.fixture(
-    params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
-)
+@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
 def index_or_series_or_array(request):
     """
     Fixture to parametrize over Index, Series, and ExtensionArray
@@ -559,7 +557,7 @@ def index_with_missing(request):
 # ----------------------------------------------------------------
 @pytest.fixture
 def empty_series():
-    return pd.Series([], index=[], dtype=np.float64)
+    return Series([], index=[], dtype=np.float64)
 
 
 @pytest.fixture
@@ -596,7 +594,7 @@ def _create_series(index):
     """ Helper for the _series dict """
     size = len(index)
     data = np.random.randn(size)
-    return pd.Series(data, index=index, name="a")
+    return Series(data, index=index, name="a")
 
 
 _series = {
@@ -1437,16 +1435,16 @@ def any_numpy_dtype(request):
     ("boolean", [True, np.nan, False]),
     ("boolean", [True, pd.NA, False]),
     ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
-    ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
+    ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
     ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
     # The following two dtypes are commented out due to GH 23554
     # ('complex', [1 + 1j, np.nan, 2 + 2j]),
     # ('timedelta64', [np.timedelta64(1, 'D'),
     #                  np.nan, np.timedelta64(2, 'D')]),
     ("timedelta", [timedelta(1), np.nan, timedelta(2)]),
     ("time", [time(1), np.nan, time(2)]),
-    ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
-    ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
+    ("period", [Period(2013), pd.NaT, Period(2018)]),
+    ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
 ]
 ids, _ = zip(*_any_skipna_inferred_dtype)  # use inferred type as fixture-id
 

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -186,8 +186,8 @@ def __init__(
         if not data.index.is_unique or not data.columns.is_unique:
             raise ValueError("style is not supported for non-unique indices.")
         self.data: DataFrame = data
-        self.index: pd.Index = data.index
-        self.columns: pd.Index = data.columns
+        self.index: Index = data.index
+        self.columns: Index = data.columns
         self.table_styles = table_styles
         if not isinstance(uuid_len, int) or not uuid_len >= 0:
             raise TypeError("``uuid_len`` must be an integer in range [0, 32].")
@@ -913,7 +913,7 @@ def _apply(
             result.columns = data.columns
         else:
             result = func(data, **kwargs)
-            if not isinstance(result, pd.DataFrame):
+            if not isinstance(result, DataFrame):
                 if not isinstance(result, np.ndarray):
                     raise TypeError(
                         f"Function {repr(func)} must return a DataFrame or ndarray "
@@ -1565,7 +1565,7 @@ def css(rgba) -> str:
             if s.ndim == 1:
                 return [css(rgba) for rgba in rgbas]
             else:
-                return pd.DataFrame(
+                return DataFrame(
                     [[css(rgba) for rgba in row] for row in rgbas],
                     index=s.index,
                     columns=s.columns,
@@ -1655,7 +1655,7 @@ def css(x):
         if s.ndim == 1:
             return [css(x) for x in normed]
         else:
-            return pd.DataFrame(
+            return DataFrame(
                 [[css(x) for x in row] for row in normed],
                 index=s.index,
                 columns=s.columns,

diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
@@ -1372,9 +1372,9 @@ def array_likes(request):
         data = memoryview(arr)
     elif name == "array":
         # stdlib array
-        from array import array as array_stdlib
+        import array
 
-        data = array_stdlib("i", arr)
+        data = array.array("i", arr)
     elif name == "dask":
         import dask.array
 

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1236,14 +1236,14 @@ def __len__(self, n):
     def test_constructor_stdlib_array(self):
         # GH 4297
         # support Array
-        from array import array as stdlib_array
+        import array
 
-        result = DataFrame({"A": stdlib_array("i", range(10))})
+        result = DataFrame({"A": array.array("i", range(10))})
         expected = DataFrame({"A": list(range(10))})
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
         expected = DataFrame([list(range(10)), list(range(10))])
-        result = DataFrame([stdlib_array("i", range(10)), stdlib_array("i", range(10))])
+        result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
     def test_constructor_range(self):

diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py
@@ -2,7 +2,7 @@
 Check that test suite file doesn't use the pandas namespace inconsistently.
 
 We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
-(likewise for some other common classes).
+(likewise for other pandas objects).
 
 This is meant to be run as a pre-commit hook - to run it manually, you can do:
 
@@ -15,43 +15,50 @@
 though note that you may need to manually fixup some imports and that you will also
 need the additional dependency `tokenize-rt` (which is left out from the pre-commit
 hook so that it uses the same virtualenv as the other local ones).
+
+The general structure is similar to that of some plugins from
+https://github.com/asottile/pyupgrade .
 """
 
 import argparse
 import ast
+import sys
 from typing import (
     MutableMapping,
+    NamedTuple,
     Optional,
     Sequence,
     Set,
-    Tuple,
 )
 
-ERROR_MESSAGE = "Found both `pd.{name}` and `{name}` in {path}"
-EXCLUDE = {
-    "eval",  # built-in, different from `pd.eval`
-    "np",  # pd.np is deprecated but still tested
-}
-Offset = Tuple[int, int]
+ERROR_MESSAGE = (
+    "{path}:{lineno}:{col_offset}: "
+    "Found both '{prefix}.{name}' and '{name}' in {path}"
+)
+
+
+class OffsetWithNamespace(NamedTuple):
+    lineno: int
+    col_offset: int
+    namespace: str
 
 
 class Visitor(ast.NodeVisitor):
     def __init__(self) -> None:
-        self.pandas_namespace: MutableMapping[Offset, str] = {}
-        self.no_namespace: Set[str] = set()
+        self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
+        self.imported_from_pandas: Set[str] = set()
 
     def visit_Attribute(self, node: ast.Attribute) -> None:
-        if (
-            isinstance(node.value, ast.Name)
-            and node.value.id == "pd"
-            and node.attr not in EXCLUDE
-        ):
-            self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr
+        if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
+            offset_with_namespace = OffsetWithNamespace(
+                node.lineno, node.col_offset, node.value.id
+            )
+            self.pandas_namespace[offset_with_namespace] = node.attr
         self.generic_visit(node)
 
-    def visit_Name(self, node: ast.Name) -> None:
-        if node.id not in EXCLUDE:
-            self.no_namespace.add(node.id)
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        if node.module is not None and "pandas" in node.module:
+            self.imported_from_pandas.update(name.name for name in node.names)
         self.generic_visit(node)
 
 
@@ -64,9 +71,11 @@ def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str
 
     tokens = src_to_tokens(content)
     for n, i in reversed_enumerate(tokens):
+        offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
         if (
-            i.offset in visitor.pandas_namespace
-            and visitor.pandas_namespace[i.offset] in visitor.no_namespace
+            offset_with_namespace in visitor.pandas_namespace
+            and visitor.pandas_namespace[offset_with_namespace]
+            in visitor.imported_from_pandas
         ):
             # Replace `pd`
             tokens[n] = i._replace(src="")
@@ -85,16 +94,28 @@ def check_for_inconsistent_pandas_namespace(
     visitor = Visitor()
     visitor.visit(tree)
 
-    inconsistencies = visitor.no_namespace.intersection(
+    inconsistencies = visitor.imported_from_pandas.intersection(
         visitor.pandas_namespace.values()
     )
+
     if not inconsistencies:
         # No inconsistent namespace usage, nothing to replace.
-        return content
+        return None
 
     if not replace:
-        msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path)
-        raise RuntimeError(msg)
+        inconsistency = inconsistencies.pop()
+        lineno, col_offset, prefix = next(
+            key for key, val in visitor.pandas_namespace.items() if val == inconsistency
+        )
+        msg = ERROR_MESSAGE.format(
+            lineno=lineno,
+            col_offset=col_offset,
+            prefix=prefix,
+            name=inconsistency,
+            path=path,
+        )
+        sys.stdout.write(msg)
+        sys.exit(1)
 
     return replace_inconsistent_pandas_namespace(visitor, content)