Skip to content

Commit

Permalink
STYLE loosen inconsistent namespace check (pandas-dev#40532)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored and vladu committed Apr 5, 2021
1 parent f2777db commit 54db432
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 86 deletions.
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,10 @@ repos:
types: [python]
exclude: ^pandas/_typing\.py$
- id: inconsistent-namespace-usage
name: 'Check for inconsistent use of pandas namespace in tests'
name: 'Check for inconsistent use of pandas namespace'
entry: python scripts/check_for_inconsistent_pandas_namespace.py
language: python
types: [python]
files: ^pandas/tests/
- id: incorrect-code-directives
name: Check for incorrect code block or IPython directives
language: pygrep
Expand Down
20 changes: 9 additions & 11 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ def setup(self, op, shape):
# construct dataframe with 2 blocks
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
df = pd.concat(
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
)
df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True)
# should already be the case, but just to be sure
df._consolidate_inplace()

Expand All @@ -151,7 +149,7 @@ def setup(self, op, shape):
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
df2 = pd.concat(
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
[DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)],
axis=1,
ignore_index=True,
)
Expand Down Expand Up @@ -459,9 +457,9 @@ class OffsetArrayArithmetic:

def setup(self, offset):
N = 10000
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
rng = date_range(start="1/1/2000", periods=N, freq="T")
self.rng = rng
self.ser = pd.Series(rng)
self.ser = Series(rng)

def time_add_series_offset(self, offset):
with warnings.catch_warnings(record=True):
Expand All @@ -478,7 +476,7 @@ class ApplyIndex:

def setup(self, offset):
N = 10000
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
rng = date_range(start="1/1/2000", periods=N, freq="T")
self.rng = rng

def time_apply_index(self, offset):
Expand All @@ -490,17 +488,17 @@ class BinaryOpsMultiIndex:
param_names = ["func"]

def setup(self, func):
date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
level_0_names = [str(i) for i in range(30)]

index = pd.MultiIndex.from_product([level_0_names, date_range])
index = pd.MultiIndex.from_product([level_0_names, array])
column_names = ["col_1", "col_2"]

self.df = pd.DataFrame(
self.df = DataFrame(
np.random.rand(len(index), 2), index=index, columns=column_names
)

self.arg_df = pd.DataFrame(
self.arg_df = DataFrame(
np.random.randint(1, 10, (len(level_0_names), 2)),
index=level_0_names,
columns=column_names,
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def setup(self):
data = np.random.randn(N)[:-i]
idx = rng[:-i]
data[100:] = np.nan
self.series[i] = pd.Series(pd.SparseArray(data), index=idx)
self.series[i] = Series(SparseArray(data), index=idx)

def time_series_to_frame(self):
pd.DataFrame(self.series)
Expand Down Expand Up @@ -63,7 +63,7 @@ def setup(self):
)

def time_sparse_series_from_coo(self):
pd.Series.sparse.from_coo(self.matrix)
Series.sparse.from_coo(self.matrix)


class ToCoo:
Expand Down
14 changes: 7 additions & 7 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,12 @@ def box_expected(expected, box_cls, transpose=True):
"""
if box_cls is pd.array:
expected = pd.array(expected)
elif box_cls is pd.Index:
expected = pd.Index(expected)
elif box_cls is pd.Series:
expected = pd.Series(expected)
elif box_cls is pd.DataFrame:
expected = pd.Series(expected).to_frame()
elif box_cls is Index:
expected = Index(expected)
elif box_cls is Series:
expected = Series(expected)
elif box_cls is DataFrame:
expected = Series(expected).to_frame()
if transpose:
# for vector operations, we need a DataFrame to be a single-row,
# not a single-column, in order to operate against non-DataFrame
Expand Down Expand Up @@ -400,7 +400,7 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None)
"x": state.rand(n) * 2 - 1,
"y": state.rand(n) * 2 - 1,
}
df = pd.DataFrame(columns, index=index, columns=sorted(columns))
df = DataFrame(columns, index=index, columns=sorted(columns))
if df.index[-1] == end:
df = df.iloc[:-1]
return df
Expand Down
18 changes: 8 additions & 10 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def unique_nulls_fixture(request):
# ----------------------------------------------------------------


@pytest.fixture(params=[pd.DataFrame, pd.Series])
@pytest.fixture(params=[DataFrame, Series])
def frame_or_series(request):
"""
Fixture to parametrize over DataFrame and Series.
Expand All @@ -338,7 +338,7 @@ def frame_or_series(request):

# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]"
@pytest.fixture(
params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item]
params=[Index, Series], ids=["index", "series"] # type: ignore[list-item]
)
def index_or_series(request):
"""
Expand All @@ -356,9 +356,7 @@ def index_or_series(request):
index_or_series2 = index_or_series


@pytest.fixture(
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
)
@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
def index_or_series_or_array(request):
"""
Fixture to parametrize over Index, Series, and ExtensionArray
Expand Down Expand Up @@ -559,7 +557,7 @@ def index_with_missing(request):
# ----------------------------------------------------------------
@pytest.fixture
def empty_series():
return pd.Series([], index=[], dtype=np.float64)
return Series([], index=[], dtype=np.float64)


@pytest.fixture
Expand Down Expand Up @@ -596,7 +594,7 @@ def _create_series(index):
""" Helper for the _series dict """
size = len(index)
data = np.random.randn(size)
return pd.Series(data, index=index, name="a")
return Series(data, index=index, name="a")


_series = {
Expand Down Expand Up @@ -1437,16 +1435,16 @@ def any_numpy_dtype(request):
("boolean", [True, np.nan, False]),
("boolean", [True, pd.NA, False]),
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
# The following two dtypes are commented out due to GH 23554
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
# ('timedelta64', [np.timedelta64(1, 'D'),
# np.nan, np.timedelta64(2, 'D')]),
("timedelta", [timedelta(1), np.nan, timedelta(2)]),
("time", [time(1), np.nan, time(2)]),
("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
("period", [Period(2013), pd.NaT, Period(2018)]),
("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
]
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id

Expand Down
10 changes: 5 additions & 5 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ def __init__(
if not data.index.is_unique or not data.columns.is_unique:
raise ValueError("style is not supported for non-unique indices.")
self.data: DataFrame = data
self.index: pd.Index = data.index
self.columns: pd.Index = data.columns
self.index: Index = data.index
self.columns: Index = data.columns
self.table_styles = table_styles
if not isinstance(uuid_len, int) or not uuid_len >= 0:
raise TypeError("``uuid_len`` must be an integer in range [0, 32].")
Expand Down Expand Up @@ -913,7 +913,7 @@ def _apply(
result.columns = data.columns
else:
result = func(data, **kwargs)
if not isinstance(result, pd.DataFrame):
if not isinstance(result, DataFrame):
if not isinstance(result, np.ndarray):
raise TypeError(
f"Function {repr(func)} must return a DataFrame or ndarray "
Expand Down Expand Up @@ -1565,7 +1565,7 @@ def css(rgba) -> str:
if s.ndim == 1:
return [css(rgba) for rgba in rgbas]
else:
return pd.DataFrame(
return DataFrame(
[[css(rgba) for rgba in row] for row in rgbas],
index=s.index,
columns=s.columns,
Expand Down Expand Up @@ -1655,7 +1655,7 @@ def css(x):
if s.ndim == 1:
return [css(x) for x in normed]
else:
return pd.DataFrame(
return DataFrame(
[[css(x) for x in row] for row in normed],
index=s.index,
columns=s.columns,
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,9 +1372,9 @@ def array_likes(request):
data = memoryview(arr)
elif name == "array":
# stdlib array
from array import array as array_stdlib
import array

data = array_stdlib("i", arr)
data = array.array("i", arr)
elif name == "dask":
import dask.array

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,14 +1236,14 @@ def __len__(self, n):
def test_constructor_stdlib_array(self):
# GH 4297
# support Array
from array import array as stdlib_array
import array

result = DataFrame({"A": stdlib_array("i", range(10))})
result = DataFrame({"A": array.array("i", range(10))})
expected = DataFrame({"A": list(range(10))})
tm.assert_frame_equal(result, expected, check_dtype=False)

expected = DataFrame([list(range(10)), list(range(10))])
result = DataFrame([stdlib_array("i", range(10)), stdlib_array("i", range(10))])
result = DataFrame([array.array("i", range(10)), array.array("i", range(10))])
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_range(self):
Expand Down
71 changes: 46 additions & 25 deletions scripts/check_for_inconsistent_pandas_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Check that test suite file doesn't use the pandas namespace inconsistently.
We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
(likewise for some other common classes).
(likewise for other pandas objects).
This is meant to be run as a pre-commit hook - to run it manually, you can do:
Expand All @@ -15,43 +15,50 @@
though note that you may need to manually fixup some imports and that you will also
need the additional dependency `tokenize-rt` (which is left out from the pre-commit
hook so that it uses the same virtualenv as the other local ones).
The general structure is similar to that of some plugins from
https://github.com/asottile/pyupgrade .
"""

import argparse
import ast
import sys
from typing import (
MutableMapping,
NamedTuple,
Optional,
Sequence,
Set,
Tuple,
)

ERROR_MESSAGE = "Found both `pd.{name}` and `{name}` in {path}"
EXCLUDE = {
"eval", # built-in, different from `pd.eval`
"np", # pd.np is deprecated but still tested
}
Offset = Tuple[int, int]
ERROR_MESSAGE = (
"{path}:{lineno}:{col_offset}: "
"Found both '{prefix}.{name}' and '{name}' in {path}"
)


class OffsetWithNamespace(NamedTuple):
lineno: int
col_offset: int
namespace: str


class Visitor(ast.NodeVisitor):
def __init__(self) -> None:
self.pandas_namespace: MutableMapping[Offset, str] = {}
self.no_namespace: Set[str] = set()
self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
self.imported_from_pandas: Set[str] = set()

def visit_Attribute(self, node: ast.Attribute) -> None:
if (
isinstance(node.value, ast.Name)
and node.value.id == "pd"
and node.attr not in EXCLUDE
):
self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr
if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
offset_with_namespace = OffsetWithNamespace(
node.lineno, node.col_offset, node.value.id
)
self.pandas_namespace[offset_with_namespace] = node.attr
self.generic_visit(node)

def visit_Name(self, node: ast.Name) -> None:
if node.id not in EXCLUDE:
self.no_namespace.add(node.id)
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
if node.module is not None and "pandas" in node.module:
self.imported_from_pandas.update(name.name for name in node.names)
self.generic_visit(node)


Expand All @@ -64,9 +71,11 @@ def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str

tokens = src_to_tokens(content)
for n, i in reversed_enumerate(tokens):
offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
if (
i.offset in visitor.pandas_namespace
and visitor.pandas_namespace[i.offset] in visitor.no_namespace
offset_with_namespace in visitor.pandas_namespace
and visitor.pandas_namespace[offset_with_namespace]
in visitor.imported_from_pandas
):
# Replace `pd`
tokens[n] = i._replace(src="")
Expand All @@ -85,16 +94,28 @@ def check_for_inconsistent_pandas_namespace(
visitor = Visitor()
visitor.visit(tree)

inconsistencies = visitor.no_namespace.intersection(
inconsistencies = visitor.imported_from_pandas.intersection(
visitor.pandas_namespace.values()
)

if not inconsistencies:
# No inconsistent namespace usage, nothing to replace.
return content
return None

if not replace:
msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path)
raise RuntimeError(msg)
inconsistency = inconsistencies.pop()
lineno, col_offset, prefix = next(
key for key, val in visitor.pandas_namespace.items() if val == inconsistency
)
msg = ERROR_MESSAGE.format(
lineno=lineno,
col_offset=col_offset,
prefix=prefix,
name=inconsistency,
path=path,
)
sys.stdout.write(msg)
sys.exit(1)

return replace_inconsistent_pandas_namespace(visitor, content)

Expand Down
Loading

0 comments on commit 54db432

Please sign in to comment.