From e946c616c0f239ba3cfdb34d663f71e6fb0de161 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Wed, 3 Mar 2021 16:30:27 +0000
Subject: [PATCH 1/2] TST/TYP: start testing public api types

---
 pandas/tests/typing/__init__.py          |   0
 pandas/tests/typing/data/fail/frame.py   |   8 +
 pandas/tests/typing/data/mypy.ini        |   3 +
 pandas/tests/typing/data/pass/frame.py   |   0
 pandas/tests/typing/data/reveal/frame.py |  23 +++
 pandas/tests/typing/test_typing.py       | 249 +++++++++++++++++++++++
 6 files changed, 283 insertions(+)
 create mode 100644 pandas/tests/typing/__init__.py
 create mode 100644 pandas/tests/typing/data/fail/frame.py
 create mode 100644 pandas/tests/typing/data/mypy.ini
 create mode 100644 pandas/tests/typing/data/pass/frame.py
 create mode 100644 pandas/tests/typing/data/reveal/frame.py
 create mode 100644 pandas/tests/typing/test_typing.py

diff --git a/pandas/tests/typing/__init__.py b/pandas/tests/typing/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/typing/data/fail/frame.py b/pandas/tests/typing/data/fail/frame.py
new file mode 100644
index 0000000000000..7b84dca6abb65
--- /dev/null
+++ b/pandas/tests/typing/data/fail/frame.py
@@ -0,0 +1,8 @@
+# flake8: noqa
+
+import pandas as pd
+
+empty_df = pd.DataFrame()
+empty_ser = pd.Series()
+
+empty_df.dot()  # E: All overload variants of "dot" of "DataFrame" require at least one argument
diff --git a/pandas/tests/typing/data/mypy.ini b/pandas/tests/typing/data/mypy.ini
new file mode 100644
index 0000000000000..b6f0294955b20
--- /dev/null
+++ b/pandas/tests/typing/data/mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+show_absolute_path = True
+ignore_missing_imports = True
diff --git a/pandas/tests/typing/data/pass/frame.py b/pandas/tests/typing/data/pass/frame.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/typing/data/reveal/frame.py b/pandas/tests/typing/data/reveal/frame.py
new file mode 100644
index 0000000000000..b707400930384
--- /dev/null
+++ b/pandas/tests/typing/data/reveal/frame.py
@@ -0,0 +1,23 @@
+# flake8: noqa
+
+import numpy as np
+
+import pandas as pd
+
+empty_df = pd.DataFrame()
+empty_ser = pd.Series()
+# TODO: np.array resolves to Any
+empty_arr = np.array([])
+# TODO: Index.__getitem__ resolves to Any
+empty_idx: pd.Index = pd.Index([1, 2, 3])[:0]
+
+
+reveal_type(empty_df.dot(empty_df))  # E: DataFrame
+reveal_type(empty_df.dot(empty_ser))  # E: Series
+reveal_type(empty_df.dot(empty_arr))  # E: Any
+reveal_type(empty_df.dot(empty_idx))  # E: DataFrame
+
+reveal_type(empty_df @ empty_df)  # E: Union[{DataFrame}, {Series}]
+reveal_type(empty_df @ empty_ser)  # E: Series
+reveal_type(empty_df @ empty_arr)  # E: Any
+reveal_type(empty_df @ empty_idx)  # E: Union[{DataFrame}, {Series}]
diff --git a/pandas/tests/typing/test_typing.py b/pandas/tests/typing/test_typing.py
new file mode 100644
index 0000000000000..fe015c56bbe00
--- /dev/null
+++ b/pandas/tests/typing/test_typing.py
@@ -0,0 +1,249 @@
+"""
+forked from https://github.com/numpy/numpy/blob/master/numpy/typing/tests/test_typing.py
+"""
+from collections import defaultdict
+import importlib.util
+import itertools
+import os
+import re
+import shutil
+from typing import (
+    IO,
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+import pytest
+
+try:
+    from mypy import api
+except ImportError:
+    NO_MYPY = True
+else:
+    NO_MYPY = False
+
+
+DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
+PASS_DIR = os.path.join(DATA_DIR, "pass")
+FAIL_DIR = os.path.join(DATA_DIR, "fail")
+REVEAL_DIR = os.path.join(DATA_DIR, "reveal")
+MYPY_INI = os.path.join(DATA_DIR, "mypy.ini")
+CACHE_DIR = os.path.join(DATA_DIR, ".mypy_cache")
+
+#: A dictionary with file names as keys and lists of the mypy stdout as values.
+#: To-be populated by `run_mypy`.
+OUTPUT_MYPY: Dict[str, List[str]] = {}
+
+
+def _key_func(key: str) -> str:
+    """Split at the first occurance of the ``:`` character.
+
+    Windows drive-letters (*e.g.* ``C:``) are ignored herein.
+    """
+    drive, tail = os.path.splitdrive(key)
+    return os.path.join(drive, tail.split(":", 1)[0])
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
+@pytest.fixture(scope="module", autouse=True)
+def run_mypy() -> None:
+    """
+    Clears the cache and run mypy before running any of the typing tests.
+
+    The mypy results are cached in `OUTPUT_MYPY` for further use.
+
+    The cache refresh can be skipped using
+
+    PANDAS_TYPING_TEST_CLEAR_CACHE=0 pytest pandas/tests/typing
+    """
+    if os.path.isdir(CACHE_DIR) and bool(
+        os.environ.get("PANDAS_TYPING_TEST_CLEAR_CACHE", True)
+    ):
+        shutil.rmtree(CACHE_DIR)
+
+    for directory in (PASS_DIR, REVEAL_DIR, FAIL_DIR):
+        # Run mypy
+        stdout, stderr, _ = api.run(
+            [
+                "--config-file",
+                MYPY_INI,
+                "--cache-dir",
+                CACHE_DIR,
+                directory,
+            ]
+        )
+        assert not stderr, directory
+        stdout = stdout.replace("*", "")
+
+        # Parse the output
+        iterator = itertools.groupby(stdout.split("\n"), key=_key_func)
+        OUTPUT_MYPY.update((k, list(v)) for k, v in iterator if k)
+
+
+def get_test_cases(directory):
+    for root, _, files in os.walk(directory):
+        for fname in files:
+            if os.path.splitext(fname)[-1] == ".py":
+                fullpath = os.path.join(root, fname)
+                # Use relative path for nice py.test name
+                relpath = os.path.relpath(fullpath, start=directory)
+
+                yield pytest.param(
+                    fullpath,
+                    # Manually specify a name for the test
+                    id=relpath,
+                )
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
+@pytest.mark.parametrize("path", get_test_cases(PASS_DIR))
+def test_success(path):
+    # Alias `OUTPUT_MYPY` so that it appears in the local namespace
+    output_mypy = OUTPUT_MYPY
+    if path in output_mypy:
+        raise AssertionError("\n".join(v for v in output_mypy[path]))
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
+@pytest.mark.parametrize("path", get_test_cases(FAIL_DIR))
+def test_fail(path):
+    __tracebackhide__ = True
+
+    with open(path) as fin:
+        lines = fin.readlines()
+
+    errors = defaultdict(lambda: "")
+
+    output_mypy = OUTPUT_MYPY
+    assert path in output_mypy
+    for error_line in output_mypy[path]:
+        match = re.match(
+            r"^.+\.py:(?P<lineno>\d+): (error|note): .+$",
+            error_line,
+        )
+        if match is None:
+            raise ValueError(f"Unexpected error line format: {error_line}")
+        lineno = int(match.group("lineno"))
+        errors[lineno] += error_line
+
+    for i, line in enumerate(lines):
+        lineno = i + 1
+        if line.startswith("#") or (" E:" not in line and lineno not in errors):
+            continue
+
+        target_line = lines[lineno - 1]
+        if "# E:" in target_line:
+            marker = target_line.split("# E:")[-1].strip()
+            expected_error = errors.get(lineno)
+            _test_fail(path, marker, expected_error, lineno)
+        else:
+            pytest.fail(f"Error {repr(errors[lineno])} not found")
+
+
+_FAIL_MSG1 = """Extra error at line {}
+
+Extra error: {!r}
+"""
+
+_FAIL_MSG2 = """Error mismatch at line {}
+
+Expected error: {!r}
+Observed error: {!r}
+"""
+
+
+def _test_fail(
+    path: str, error: str, expected_error: Optional[str], lineno: int
+) -> None:
+    if expected_error is None:
+        raise AssertionError(_FAIL_MSG1.format(lineno, error))
+    elif error not in expected_error:
+        raise AssertionError(_FAIL_MSG2.format(lineno, expected_error, error))
+
+
+def _construct_format_dict():
+    return {
+        "DataFrame": "pandas.core.frame.DataFrame",
+        "Series": "pandas.core.series.Series",
+    }
+
+
+#: A dictionary with all supported format keys (as keys)
+#: and matching values
+FORMAT_DICT: Dict[str, str] = _construct_format_dict()
+
+
+def _parse_reveals(file: IO[str]) -> List[str]:
+    """Extract and parse all ``"  # E: "`` comments from the passed file-like object.
+
+    All format keys will be substituted for their respective value from `FORMAT_DICT`,
+    *e.g.* ``"{float64}"`` becomes ``"numpy.floating[numpy.typing._64Bit]"``.
+    """
+    string = file.read().replace("*", "")
+
+    # Grab all `# E:`-based comments
+    comments_array = np.char.partition(string.split("\n"), sep="  # E: ")[:, 2]
+    comments = "/n".join(comments_array)
+
+    # Only search for the `{*}` pattern within comments,
+    # otherwise there is the risk of accidently grabbing dictionaries and sets
+    key_set = set(re.findall(r"\{(.*?)\}", comments))
+    kwargs = {
+        k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {k!r}>") for k in key_set
+    }
+    fmt_str = comments.format(**kwargs)
+
+    return fmt_str.split("/n")
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
+@pytest.mark.parametrize("path", get_test_cases(REVEAL_DIR))
+def test_reveal(path):
+    __tracebackhide__ = True
+
+    with open(path) as fin:
+        lines = _parse_reveals(fin)
+
+    output_mypy = OUTPUT_MYPY
+    assert path in output_mypy
+    for error_line in output_mypy[path]:
+        match = re.match(
+            r"^.+\.py:(?P<lineno>\d+): note: .+$",
+            error_line,
+        )
+        if match is None:
+            raise ValueError(f"Unexpected reveal line format: {error_line}")
+        lineno = int(match.group("lineno")) - 1
+        assert "Revealed type is" in error_line
+
+        marker = lines[lineno]
+        _test_reveal(path, marker, error_line, 1 + lineno)
+
+
+_REVEAL_MSG = """Reveal mismatch at line {}
+
+Expected reveal: {!r}
+Observed reveal: {!r}
+"""
+
+
+def _test_reveal(path: str, reveal: str, expected_reveal: str, lineno: int) -> None:
+    if reveal not in expected_reveal:
+        raise AssertionError(_REVEAL_MSG.format(lineno, expected_reveal, reveal))
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed")
+@pytest.mark.parametrize("path", get_test_cases(PASS_DIR))
+def test_code_runs(path):
+    path_without_extension, _ = os.path.splitext(path)
+    dirname, filename = path.split(os.sep)[-2:]
+    spec = importlib.util.spec_from_file_location(f"{dirname}.{filename}", path)
+    test_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(test_module)

From 4689a9d7ca37950f8a8cbd63c9104750e740c7dd Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 4 Mar 2021 12:28:19 +0000
Subject: [PATCH 2/2] linting

---
 ci/code_checks.sh                  |  2 +-
 pandas/tests/typing/test_typing.py | 37 +++++++++++-------------------
 setup.cfg                          |  1 +
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 251f450840ea9..9467e285cca62 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -114,7 +114,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
     invgrep -R --include=*.{py,pyx} '!r}' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
-    echo $MSG "DONE"
+
 fi
 
 ### CODE ###
diff --git a/pandas/tests/typing/test_typing.py b/pandas/tests/typing/test_typing.py
index fe015c56bbe00..9bfa02f695ca4 100644
--- a/pandas/tests/typing/test_typing.py
+++ b/pandas/tests/typing/test_typing.py
@@ -145,25 +145,19 @@ def test_fail(path):
             pytest.fail(f"Error {repr(errors[lineno])} not found")
 
 
-_FAIL_MSG1 = """Extra error at line {}
-
-Extra error: {!r}
-"""
-
-_FAIL_MSG2 = """Error mismatch at line {}
-
-Expected error: {!r}
-Observed error: {!r}
-"""
-
-
 def _test_fail(
     path: str, error: str, expected_error: Optional[str], lineno: int
 ) -> None:
     if expected_error is None:
-        raise AssertionError(_FAIL_MSG1.format(lineno, error))
+        raise AssertionError(
+            f"Extra error at line {lineno}\n\nExtra error: {repr(error)}"
+        )
     elif error not in expected_error:
-        raise AssertionError(_FAIL_MSG2.format(lineno, expected_error, error))
+        raise AssertionError(
+            f"Error mismatch at line {lineno}\n\n"
+            f"Expected error: {repr(expected_error)}\n"
+            f"Observed error: {repr(error)}"
+        )
 
 
 def _construct_format_dict():
@@ -194,7 +188,7 @@ def _parse_reveals(file: IO[str]) -> List[str]:
     # otherwise there is the risk of accidently grabbing dictionaries and sets
     key_set = set(re.findall(r"\{(.*?)\}", comments))
     kwargs = {
-        k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {k!r}>") for k in key_set
+        k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {repr(k)}>") for k in key_set
     }
     fmt_str = comments.format(**kwargs)
 
@@ -226,16 +220,13 @@ def test_reveal(path):
         _test_reveal(path, marker, error_line, 1 + lineno)
 
 
-_REVEAL_MSG = """Reveal mismatch at line {}
-
-Expected reveal: {!r}
-Observed reveal: {!r}
-"""
-
-
 def _test_reveal(path: str, reveal: str, expected_reveal: str, lineno: int) -> None:
     if reveal not in expected_reveal:
-        raise AssertionError(_REVEAL_MSG.format(lineno, expected_reveal, reveal))
+        raise AssertionError(
+            f"Reveal mismatch at line {lineno}\n\n"
+            f"Expected reveal: {repr(expected_reveal)}\n"
+            f"Observed reveal: {repr(reveal)}"
+        )
 
 
 @pytest.mark.slow
diff --git a/setup.cfg b/setup.cfg
index ca0673bd5fc34..f46e5659a52b9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -183,6 +183,7 @@ strict_equality = True
 warn_redundant_casts = True
 warn_unused_ignores = True
 show_error_codes = True
+exclude = pandas/tests/typing/data
 
 [mypy-pandas.tests.*]
 check_untyped_defs = False