From e946c616c0f239ba3cfdb34d663f71e6fb0de161 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 3 Mar 2021 16:30:27 +0000 Subject: [PATCH 1/2] TST/TYP: start testing public api types --- pandas/tests/typing/__init__.py | 0 pandas/tests/typing/data/fail/frame.py | 8 + pandas/tests/typing/data/mypy.ini | 3 + pandas/tests/typing/data/pass/frame.py | 0 pandas/tests/typing/data/reveal/frame.py | 23 +++ pandas/tests/typing/test_typing.py | 249 +++++++++++++++++++++++ 6 files changed, 283 insertions(+) create mode 100644 pandas/tests/typing/__init__.py create mode 100644 pandas/tests/typing/data/fail/frame.py create mode 100644 pandas/tests/typing/data/mypy.ini create mode 100644 pandas/tests/typing/data/pass/frame.py create mode 100644 pandas/tests/typing/data/reveal/frame.py create mode 100644 pandas/tests/typing/test_typing.py diff --git a/pandas/tests/typing/__init__.py b/pandas/tests/typing/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/data/fail/frame.py b/pandas/tests/typing/data/fail/frame.py new file mode 100644 index 0000000000000..7b84dca6abb65 --- /dev/null +++ b/pandas/tests/typing/data/fail/frame.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import pandas as pd + +empty_df = pd.DataFrame() +empty_ser = pd.Series() + +empty_df.dot() # E: All overload variants of "dot" of "DataFrame" require at least one argument diff --git a/pandas/tests/typing/data/mypy.ini b/pandas/tests/typing/data/mypy.ini new file mode 100644 index 0000000000000..b6f0294955b20 --- /dev/null +++ b/pandas/tests/typing/data/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +show_absolute_path = True +ignore_missing_imports = True diff --git a/pandas/tests/typing/data/pass/frame.py b/pandas/tests/typing/data/pass/frame.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/data/reveal/frame.py b/pandas/tests/typing/data/reveal/frame.py new file mode 100644 index 0000000000000..b707400930384 --- /dev/null +++ b/pandas/tests/typing/data/reveal/frame.py @@ -0,0 +1,23 @@ +# flake8: noqa + +import numpy as np + +import pandas as pd + +empty_df = pd.DataFrame() +empty_ser = pd.Series() +# TODO: np.array resolves to Any +empty_arr = np.array([]) +# TODO: Index.__getitem__ resolves to Any +empty_idx: pd.Index = pd.Index([1, 2, 3])[:0] + + +reveal_type(empty_df.dot(empty_df)) # E: DataFrame +reveal_type(empty_df.dot(empty_ser)) # E: Series +reveal_type(empty_df.dot(empty_arr)) # E: Any +reveal_type(empty_df.dot(empty_idx)) # E: DataFrame + +reveal_type(empty_df @ empty_df) # E: Union[{DataFrame}, {Series}] +reveal_type(empty_df @ empty_ser) # E: Series +reveal_type(empty_df @ empty_arr) # E: Any +reveal_type(empty_df @ empty_idx) # E: Union[{DataFrame}, {Series}] diff --git a/pandas/tests/typing/test_typing.py b/pandas/tests/typing/test_typing.py new file mode 100644 index 0000000000000..fe015c56bbe00 --- /dev/null +++ b/pandas/tests/typing/test_typing.py @@ -0,0 +1,249 @@ +""" +forked from https://github.com/numpy/numpy/blob/master/numpy/typing/tests/test_typing.py +""" +from collections import defaultdict +import importlib.util +import itertools +import os +import re +import shutil +from typing import ( + IO, + Dict, + List, + Optional, +) + +import numpy as np +import pytest + +try: + from mypy import api +except ImportError: + NO_MYPY = True +else: + NO_MYPY = False + + +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +PASS_DIR = os.path.join(DATA_DIR, "pass") +FAIL_DIR = os.path.join(DATA_DIR, "fail") +REVEAL_DIR = os.path.join(DATA_DIR, "reveal") +MYPY_INI = os.path.join(DATA_DIR, "mypy.ini") +CACHE_DIR = os.path.join(DATA_DIR, ".mypy_cache") + +#: A dictionary with file names as keys and lists of the mypy stdout as values. +#: To-be populated by `run_mypy`. +OUTPUT_MYPY: Dict[str, List[str]] = {} + + +def _key_func(key: str) -> str: + """Split at the first occurance of the ``:`` character. + + Windows drive-letters (*e.g.* ``C:``) are ignored herein. + """ + drive, tail = os.path.splitdrive(key) + return os.path.join(drive, tail.split(":", 1)[0]) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.fixture(scope="module", autouse=True) +def run_mypy() -> None: + """ + Clears the cache and run mypy before running any of the typing tests. + + The mypy results are cached in `OUTPUT_MYPY` for further use. + + The cache refresh can be skipped using + + PANDAS_TYPING_TEST_CLEAR_CACHE=0 pytest pandas/tests/typing + """ + if os.path.isdir(CACHE_DIR) and bool( + os.environ.get("PANDAS_TYPING_TEST_CLEAR_CACHE", True) + ): + shutil.rmtree(CACHE_DIR) + + for directory in (PASS_DIR, REVEAL_DIR, FAIL_DIR): + # Run mypy + stdout, stderr, _ = api.run( + [ + "--config-file", + MYPY_INI, + "--cache-dir", + CACHE_DIR, + directory, + ] + ) + assert not stderr, directory + stdout = stdout.replace("*", "") + + # Parse the output + iterator = itertools.groupby(stdout.split("\n"), key=_key_func) + OUTPUT_MYPY.update((k, list(v)) for k, v in iterator if k) + + +def get_test_cases(directory): + for root, _, files in os.walk(directory): + for fname in files: + if os.path.splitext(fname)[-1] == ".py": + fullpath = os.path.join(root, fname) + # Use relative path for nice py.test name + relpath = os.path.relpath(fullpath, start=directory) + + yield pytest.param( + fullpath, + # Manually specify a name for the test + id=relpath, + ) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(PASS_DIR)) +def test_success(path): + # Alias `OUTPUT_MYPY` so that it appears in the local namespace + output_mypy = OUTPUT_MYPY + if path in output_mypy: + raise AssertionError("\n".join(v for v in output_mypy[path])) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(FAIL_DIR)) +def test_fail(path): + __tracebackhide__ = True + + with open(path) as fin: + lines = fin.readlines() + + errors = defaultdict(lambda: "") + + output_mypy = OUTPUT_MYPY + assert path in output_mypy + for error_line in output_mypy[path]: + match = re.match( + r"^.+\.py:(?P\d+): (error|note): .+$", + error_line, + ) + if match is None: + raise ValueError(f"Unexpected error line format: {error_line}") + lineno = int(match.group("lineno")) + errors[lineno] += error_line + + for i, line in enumerate(lines): + lineno = i + 1 + if line.startswith("#") or (" E:" not in line and lineno not in errors): + continue + + target_line = lines[lineno - 1] + if "# E:" in target_line: + marker = target_line.split("# E:")[-1].strip() + expected_error = errors.get(lineno) + _test_fail(path, marker, expected_error, lineno) + else: + pytest.fail(f"Error {repr(errors[lineno])} not found") + + +_FAIL_MSG1 = """Extra error at line {} + +Extra error: {!r} +""" + +_FAIL_MSG2 = """Error mismatch at line {} + +Expected error: {!r} +Observed error: {!r} +""" + + +def _test_fail( + path: str, error: str, expected_error: Optional[str], lineno: int +) -> None: + if expected_error is None: + raise AssertionError(_FAIL_MSG1.format(lineno, error)) + elif error not in expected_error: + raise AssertionError(_FAIL_MSG2.format(lineno, expected_error, error)) + + +def _construct_format_dict(): + return { + "DataFrame": "pandas.core.frame.DataFrame", + "Series": "pandas.core.series.Series", + } + + +#: A dictionary with all supported format keys (as keys) +#: and matching values +FORMAT_DICT: Dict[str, str] = _construct_format_dict() + + +def _parse_reveals(file: IO[str]) -> List[str]: + """Extract and parse all ``" # E: "`` comments from the passed file-like object. + + All format keys will be substituted for their respective value from `FORMAT_DICT`, + *e.g.* ``"{float64}"`` becomes ``"numpy.floating[numpy.typing._64Bit]"``. + """ + string = file.read().replace("*", "") + + # Grab all `# E:`-based comments + comments_array = np.char.partition(string.split("\n"), sep=" # E: ")[:, 2] + comments = "/n".join(comments_array) + + # Only search for the `{*}` pattern within comments, + # otherwise there is the risk of accidently grabbing dictionaries and sets + key_set = set(re.findall(r"\{(.*?)\}", comments)) + kwargs = { + k: FORMAT_DICT.get(k, f"") for k in key_set + } + fmt_str = comments.format(**kwargs) + + return fmt_str.split("/n") + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(REVEAL_DIR)) +def test_reveal(path): + __tracebackhide__ = True + + with open(path) as fin: + lines = _parse_reveals(fin) + + output_mypy = OUTPUT_MYPY + assert path in output_mypy + for error_line in output_mypy[path]: + match = re.match( + r"^.+\.py:(?P\d+): note: .+$", + error_line, + ) + if match is None: + raise ValueError(f"Unexpected reveal line format: {error_line}") + lineno = int(match.group("lineno")) - 1 + assert "Revealed type is" in error_line + + marker = lines[lineno] + _test_reveal(path, marker, error_line, 1 + lineno) + + +_REVEAL_MSG = """Reveal mismatch at line {} + +Expected reveal: {!r} +Observed reveal: {!r} +""" + + +def _test_reveal(path: str, reveal: str, expected_reveal: str, lineno: int) -> None: + if reveal not in expected_reveal: + raise AssertionError(_REVEAL_MSG.format(lineno, expected_reveal, reveal)) + + +@pytest.mark.slow +@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") +@pytest.mark.parametrize("path", get_test_cases(PASS_DIR)) +def test_code_runs(path): + path_without_extension, _ = os.path.splitext(path) + dirname, filename = path.split(os.sep)[-2:] + spec = importlib.util.spec_from_file_location(f"{dirname}.{filename}", path) + test_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(test_module) From 4689a9d7ca37950f8a8cbd63c9104750e740c7dd Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Mar 2021 12:28:19 +0000 Subject: [PATCH 2/2] linting --- ci/code_checks.sh | 2 +- pandas/tests/typing/test_typing.py | 37 +++++++++++------------------- setup.cfg | 1 + 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 251f450840ea9..9467e285cca62 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -114,7 +114,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG invgrep -R --include=*.{py,pyx} '!r}' pandas RET=$(($RET + $?)) ; echo $MSG "DONE" - echo $MSG "DONE" + fi ### CODE ### diff --git a/pandas/tests/typing/test_typing.py b/pandas/tests/typing/test_typing.py index fe015c56bbe00..9bfa02f695ca4 100644 --- a/pandas/tests/typing/test_typing.py +++ b/pandas/tests/typing/test_typing.py @@ -145,25 +145,19 @@ def test_fail(path): pytest.fail(f"Error {repr(errors[lineno])} not found") -_FAIL_MSG1 = """Extra error at line {} - -Extra error: {!r} -""" - -_FAIL_MSG2 = """Error mismatch at line {} - -Expected error: {!r} -Observed error: {!r} -""" - - def _test_fail( path: str, error: str, expected_error: Optional[str], lineno: int ) -> None: if expected_error is None: - raise AssertionError(_FAIL_MSG1.format(lineno, error)) + raise AssertionError( + f"Extra error at line {lineno}\n\nExtra error: {repr(error)}" + ) elif error not in expected_error: - raise AssertionError(_FAIL_MSG2.format(lineno, expected_error, error)) + raise AssertionError( + f"Error mismatch at line {lineno}\n\n" + f"Expected error: {repr(expected_error)}\n" + f"Observed error: {repr(error)}" + ) def _construct_format_dict(): @@ -194,7 +188,7 @@ def _parse_reveals(file: IO[str]) -> List[str]: # otherwise there is the risk of accidently grabbing dictionaries and sets key_set = set(re.findall(r"\{(.*?)\}", comments)) kwargs = { - k: FORMAT_DICT.get(k, f"") for k in key_set + k: FORMAT_DICT.get(k, f"") for k in key_set } fmt_str = comments.format(**kwargs) @@ -226,16 +220,13 @@ def test_reveal(path): _test_reveal(path, marker, error_line, 1 + lineno) -_REVEAL_MSG = """Reveal mismatch at line {} - -Expected reveal: {!r} -Observed reveal: {!r} -""" - - def _test_reveal(path: str, reveal: str, expected_reveal: str, lineno: int) -> None: if reveal not in expected_reveal: - raise AssertionError(_REVEAL_MSG.format(lineno, expected_reveal, reveal)) + raise AssertionError( + f"Reveal mismatch at line {lineno}\n\n" + f"Expected reveal: {repr(expected_reveal)}\n" + f"Observed reveal: {repr(reveal)}" + ) @pytest.mark.slow diff --git a/setup.cfg b/setup.cfg index ca0673bd5fc34..f46e5659a52b9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -183,6 +183,7 @@ strict_equality = True warn_redundant_casts = True warn_unused_ignores = True show_error_codes = True +exclude = pandas/tests/typing/data [mypy-pandas.tests.*] check_untyped_defs = False