Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST/TYP: start testing public api types #40202

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
invgrep -R --include=*.{py,pyx} '!r}' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"
echo $MSG "DONE"

fi

### CODE ###
Expand Down
Empty file added pandas/tests/typing/__init__.py
Empty file.
8 changes: 8 additions & 0 deletions pandas/tests/typing/data/fail/frame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# flake8: noqa

import pandas as pd

empty_df = pd.DataFrame()
empty_ser = pd.Series()

empty_df.dot() # E: All overload variants of "dot" of "DataFrame" require at least one argument
3 changes: 3 additions & 0 deletions pandas/tests/typing/data/mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[mypy]
show_absolute_path = True
ignore_missing_imports = True
Empty file.
23 changes: 23 additions & 0 deletions pandas/tests/typing/data/reveal/frame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# flake8: noqa

import numpy as np

import pandas as pd

empty_df = pd.DataFrame()
empty_ser = pd.Series()
# TODO: np.array resolves to Any
empty_arr = np.array([])
# TODO: Index.__getitem__ resolves to Any
empty_idx: pd.Index = pd.Index([1, 2, 3])[:0]


reveal_type(empty_df.dot(empty_df)) # E: DataFrame
reveal_type(empty_df.dot(empty_ser)) # E: Series
reveal_type(empty_df.dot(empty_arr)) # E: Any
reveal_type(empty_df.dot(empty_idx)) # E: DataFrame

reveal_type(empty_df @ empty_df) # E: Union[{DataFrame}, {Series}]
reveal_type(empty_df @ empty_ser) # E: Series
reveal_type(empty_df @ empty_arr) # E: Any
reveal_type(empty_df @ empty_idx) # E: Union[{DataFrame}, {Series}]
240 changes: 240 additions & 0 deletions pandas/tests/typing/test_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
"""
forked from https://github.com/numpy/numpy/blob/master/numpy/typing/tests/test_typing.py
"""
from collections import defaultdict
import importlib.util
import itertools
import os
import re
import shutil
from typing import (
IO,
Dict,
List,
Optional,
)

import numpy as np
import pytest

try:
from mypy import api
except ImportError:
NO_MYPY = True
else:
NO_MYPY = False


DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
PASS_DIR = os.path.join(DATA_DIR, "pass")
FAIL_DIR = os.path.join(DATA_DIR, "fail")
REVEAL_DIR = os.path.join(DATA_DIR, "reveal")
MYPY_INI = os.path.join(DATA_DIR, "mypy.ini")
CACHE_DIR = os.path.join(DATA_DIR, ".mypy_cache")

#: A dictionary with file names as keys and lists of the mypy stdout as values.
#: To-be populated by `run_mypy`.
OUTPUT_MYPY: Dict[str, List[str]] = {}


def _key_func(key: str) -> str:
"""Split at the first occurance of the ``:`` character.

Windows drive-letters (*e.g.* ``C:``) are ignored herein.
"""
drive, tail = os.path.splitdrive(key)
return os.path.join(drive, tail.split(":", 1)[0])


@pytest.mark.slow
@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
@pytest.fixture(scope="module", autouse=True)
def run_mypy() -> None:
"""
Clears the cache and run mypy before running any of the typing tests.

The mypy results are cached in `OUTPUT_MYPY` for further use.

The cache refresh can be skipped using

PANDAS_TYPING_TEST_CLEAR_CACHE=0 pytest pandas/tests/typing
"""
if os.path.isdir(CACHE_DIR) and bool(
os.environ.get("PANDAS_TYPING_TEST_CLEAR_CACHE", True)
):
shutil.rmtree(CACHE_DIR)

for directory in (PASS_DIR, REVEAL_DIR, FAIL_DIR):
# Run mypy
stdout, stderr, _ = api.run(
[
"--config-file",
MYPY_INI,
"--cache-dir",
CACHE_DIR,
directory,
]
)
assert not stderr, directory
stdout = stdout.replace("*", "")

# Parse the output
iterator = itertools.groupby(stdout.split("\n"), key=_key_func)
OUTPUT_MYPY.update((k, list(v)) for k, v in iterator if k)


def get_test_cases(directory):
for root, _, files in os.walk(directory):
for fname in files:
if os.path.splitext(fname)[-1] == ".py":
fullpath = os.path.join(root, fname)
# Use relative path for nice py.test name
relpath = os.path.relpath(fullpath, start=directory)

yield pytest.param(
fullpath,
# Manually specify a name for the test
id=relpath,
)


@pytest.mark.slow
@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
@pytest.mark.parametrize("path", get_test_cases(PASS_DIR))
def test_success(path):
# Alias `OUTPUT_MYPY` so that it appears in the local namespace
output_mypy = OUTPUT_MYPY
if path in output_mypy:
raise AssertionError("\n".join(v for v in output_mypy[path]))


@pytest.mark.slow
@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
@pytest.mark.parametrize("path", get_test_cases(FAIL_DIR))
def test_fail(path):
__tracebackhide__ = True

with open(path) as fin:
lines = fin.readlines()

errors = defaultdict(lambda: "")

output_mypy = OUTPUT_MYPY
assert path in output_mypy
for error_line in output_mypy[path]:
match = re.match(
r"^.+\.py:(?P<lineno>\d+): (error|note): .+$",
error_line,
)
if match is None:
raise ValueError(f"Unexpected error line format: {error_line}")
lineno = int(match.group("lineno"))
errors[lineno] += error_line

for i, line in enumerate(lines):
lineno = i + 1
if line.startswith("#") or (" E:" not in line and lineno not in errors):
continue

target_line = lines[lineno - 1]
if "# E:" in target_line:
marker = target_line.split("# E:")[-1].strip()
expected_error = errors.get(lineno)
_test_fail(path, marker, expected_error, lineno)
else:
pytest.fail(f"Error {repr(errors[lineno])} not found")


def _test_fail(
path: str, error: str, expected_error: Optional[str], lineno: int
) -> None:
if expected_error is None:
raise AssertionError(
f"Extra error at line {lineno}\n\nExtra error: {repr(error)}"
)
elif error not in expected_error:
raise AssertionError(
f"Error mismatch at line {lineno}\n\n"
f"Expected error: {repr(expected_error)}\n"
f"Observed error: {repr(error)}"
)


def _construct_format_dict():
return {
"DataFrame": "pandas.core.frame.DataFrame",
"Series": "pandas.core.series.Series",
}


#: A dictionary with all supported format keys (as keys)
#: and matching values
FORMAT_DICT: Dict[str, str] = _construct_format_dict()


def _parse_reveals(file: IO[str]) -> List[str]:
"""Extract and parse all ``" # E: "`` comments from the passed file-like object.

All format keys will be substituted for their respective value from `FORMAT_DICT`,
*e.g.* ``"{float64}"`` becomes ``"numpy.floating[numpy.typing._64Bit]"``.
"""
string = file.read().replace("*", "")

# Grab all `# E:`-based comments
comments_array = np.char.partition(string.split("\n"), sep=" # E: ")[:, 2]
comments = "/n".join(comments_array)

# Only search for the `{*}` pattern within comments,
# otherwise there is the risk of accidently grabbing dictionaries and sets
key_set = set(re.findall(r"\{(.*?)\}", comments))
kwargs = {
k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {repr(k)}>") for k in key_set
}
fmt_str = comments.format(**kwargs)

return fmt_str.split("/n")


@pytest.mark.slow
@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
@pytest.mark.parametrize("path", get_test_cases(REVEAL_DIR))
def test_reveal(path):
__tracebackhide__ = True

with open(path) as fin:
lines = _parse_reveals(fin)

output_mypy = OUTPUT_MYPY
assert path in output_mypy
for error_line in output_mypy[path]:
match = re.match(
r"^.+\.py:(?P<lineno>\d+): note: .+$",
error_line,
)
if match is None:
raise ValueError(f"Unexpected reveal line format: {error_line}")
lineno = int(match.group("lineno")) - 1
assert "Revealed type is" in error_line

marker = lines[lineno]
_test_reveal(path, marker, error_line, 1 + lineno)


def _test_reveal(path: str, reveal: str, expected_reveal: str, lineno: int) -> None:
if reveal not in expected_reveal:
raise AssertionError(
f"Reveal mismatch at line {lineno}\n\n"
f"Expected reveal: {repr(expected_reveal)}\n"
f"Observed reveal: {repr(reveal)}"
)


@pytest.mark.slow
@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
@pytest.mark.parametrize("path", get_test_cases(PASS_DIR))
def test_code_runs(path):
path_without_extension, _ = os.path.splitext(path)
dirname, filename = path.split(os.sep)[-2:]
spec = importlib.util.spec_from_file_location(f"{dirname}.{filename}", path)
test_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(test_module)
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ strict_equality = True
warn_redundant_casts = True
warn_unused_ignores = True
show_error_codes = True
exclude = pandas/tests/typing/data

[mypy-pandas.tests.*]
check_untyped_defs = False
Expand Down