From 3e1fdac5cf283cb5348523d9e3d00a35b7f06c10 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Thu, 25 May 2023 09:38:21 -0400
Subject: [PATCH] Revamp test selection for the example tests (#23737)

* Revamp test selection for the example tests

* Rename old XLA test and fake modif in run_glue

* Fixes

* Fake Trainer modif

* Remove fake modifs
---
 .circleci/config.yml                          |  21 ++--
 .circleci/create_circleci_config.py           |  15 ++-
 ...a_examples.py => old_test_xla_examples.py} |   0
 tests/repo_utils/test_tests_fetcher.py        | 113 +++++++++++++++++-
 utils/tests_fetcher.py                        |  63 +++++++---
 5 files changed, 172 insertions(+), 40 deletions(-)
 rename examples/pytorch/{test_xla_examples.py => old_test_xla_examples.py} (100%)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 66678d0d4a0f5d..0a5060b3490043 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -43,6 +43,12 @@ jobs:
                 else
                     touch test_preparation/test_list.txt
                 fi
+            - run: |
+                  if [ -f examples_test_list.txt ]; then
+                      mv examples_test_list.txt test_preparation/examples_test_list.txt
+                  else
+                      touch test_preparation/examples_test_list.txt
+                  fi
             - run: |
                 if [ -f doctest_list.txt ]; then
                     cp doctest_list.txt test_preparation/doctest_list.txt
@@ -62,19 +68,6 @@ jobs:
                 else
                     touch test_preparation/filtered_test_list.txt
                 fi
-            - run: python utils/tests_fetcher.py --filters tests examples | tee examples_tests_fetched_summary.txt
-            - run: |
-                  if [ -f test_list.txt ]; then
-                      mv test_list.txt test_preparation/examples_test_list.txt
-                  else
-                      touch test_preparation/examples_test_list.txt
-                  fi
-            - run: |
-                  if [ -f filtered_test_list_cross_tests.txt ]; then
-                      mv filtered_test_list_cross_tests.txt test_preparation/filtered_test_list_cross_tests.txt
-                  else
-                      touch test_preparation/filtered_test_list_cross_tests.txt
-                  fi
             - store_artifacts:
                   path: test_preparation/test_list.txt
             - store_artifacts:
@@ -111,7 +104,7 @@ jobs:
             - run: |
                   mkdir test_preparation
                   echo -n "tests" > test_preparation/test_list.txt
-                  echo -n "tests" > test_preparation/examples_test_list.txt
+                  echo -n "all" > test_preparation/examples_test_list.txt
                   echo -n "tests/repo_utils" > test_preparation/test_repo_utils.txt
             - run: |
                   echo -n "tests" > test_list.txt
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
index 4bc5ce17d08cf9..c9d56754a9407f 100644
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@@ -342,7 +342,6 @@ def job_name(self):
         "pip install .[sklearn,torch,sentencepiece,testing,torch-speech]",
         "pip install -r examples/pytorch/_tests_requirements.txt",
     ],
-    tests_to_run="./examples/pytorch/",
 )
 
 
@@ -355,7 +354,6 @@ def job_name(self):
         "pip install .[sklearn,tensorflow,sentencepiece,testing]",
         "pip install -r examples/tensorflow/_tests_requirements.txt",
     ],
-    tests_to_run="./examples/tensorflow/",
 )
 
 
@@ -367,7 +365,6 @@ def job_name(self):
         "pip install .[flax,testing,sentencepiece]",
         "pip install -r examples/flax/_tests_requirements.txt",
     ],
-    tests_to_run="./examples/flax/",
 )
 
 
@@ -551,7 +548,17 @@ def create_circleci_config(folder=None):
 
     example_file = os.path.join(folder, "examples_test_list.txt")
     if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
-        jobs.extend(EXAMPLES_TESTS)
+        with open(example_file, "r", encoding="utf-8") as f:
+            example_tests = f.read().split(" ")
+        for job in EXAMPLES_TESTS:
+            framework = job.name.replace("examples_", "").replace("torch", "pytorch")
+            if example_tests == "all":
+                job.tests_to_run = [f"examples/{framework}"]
+            else:
+                job.tests_to_run = [f for f in example_tests if f.startswith(f"examples/{framework}")]
+            
+            if len(job.tests_to_run) > 0:
+                jobs.append(job)
 
     doctest_file = os.path.join(folder, "doctest_list.txt")
     if os.path.exists(doctest_file):
diff --git a/examples/pytorch/test_xla_examples.py b/examples/pytorch/old_test_xla_examples.py
similarity index 100%
rename from examples/pytorch/test_xla_examples.py
rename to examples/pytorch/old_test_xla_examples.py
diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py
index e02a917700dd2f..6ab213b70cae64 100644
--- a/tests/repo_utils/test_tests_fetcher.py
+++ b/tests/repo_utils/test_tests_fetcher.py
@@ -42,6 +42,7 @@
     get_module_dependencies,
     get_tree_starting_at,
     infer_tests_to_run,
+    init_test_examples_dependencies,
     parse_commit_message,
     print_tree_deps_of,
 )
@@ -149,7 +150,19 @@ def create_tmp_repo(tmp_dir, models=None):
                 f"from transformers import {cls}Config, {cls}Model\nfrom ...test_modeling_common import ModelTesterMixin\n\ncode"
             )
 
-    repo.index.add(["src", "tests"])
+    example_dir = tmp_dir / "examples"
+    example_dir.mkdir(exist_ok=True)
+    for framework in ["flax", "pytorch", "tensorflow"]:
+        framework_dir = example_dir / framework
+        framework_dir.mkdir(exist_ok=True)
+        with open(framework_dir / f"test_{framework}_examples.py", "w") as f:
+            f.write("""test_args = "run_glue.py"\n""")
+        glue_dir = framework_dir / "text-classification"
+        glue_dir.mkdir(exist_ok=True)
+        with open(glue_dir / "run_glue.py", "w") as f:
+            f.write("from transformers import BertModel\n\ncode")
+
+    repo.index.add(["examples", "src", "tests"])
     repo.index.commit("Initial commit")
     repo.create_head("main")
     repo.head.reference = repo.refs.main
@@ -164,12 +177,14 @@ def patch_transformer_repo_path(new_folder):
     """
     old_repo_path = tests_fetcher.PATH_TO_REPO
     tests_fetcher.PATH_TO_REPO = Path(new_folder).resolve()
+    tests_fetcher.PATH_TO_EXAMPLES = tests_fetcher.PATH_TO_REPO / "examples"
     tests_fetcher.PATH_TO_TRANFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers"
     tests_fetcher.PATH_TO_TESTS = tests_fetcher.PATH_TO_REPO / "tests"
     try:
         yield
     finally:
         tests_fetcher.PATH_TO_REPO = old_repo_path
+        tests_fetcher.PATH_TO_EXAMPLES = tests_fetcher.PATH_TO_REPO / "examples"
         tests_fetcher.PATH_TO_TRANFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers"
         tests_fetcher.PATH_TO_TESTS = tests_fetcher.PATH_TO_REPO / "tests"
 
@@ -409,6 +424,17 @@ def test_get_module_dependencies(self):
             with patch_transformer_repo_path(tmp_folder):
                 assert get_module_dependencies(BERT_MODELING_FILE) == expected_bert_dependencies
 
+            # Test with an example
+            create_tmp_repo(tmp_folder)
+
+            expected_example_dependencies = ["src/transformers/models/bert/modeling_bert.py"]
+
+            with patch_transformer_repo_path(tmp_folder):
+                assert (
+                    get_module_dependencies("examples/pytorch/text-classification/run_glue.py")
+                    == expected_example_dependencies
+                )
+
     def test_create_reverse_dependency_tree(self):
         with tempfile.TemporaryDirectory() as tmp_folder:
             tmp_folder = Path(tmp_folder)
@@ -494,6 +520,33 @@ def test_print_tree_deps_of(self):
 
             assert cs.out.strip() in [expected_std_out, expected_std_out_2]
 
+    def test_init_test_examples_dependencies(self):
+        with tempfile.TemporaryDirectory() as tmp_folder:
+            tmp_folder = Path(tmp_folder)
+            create_tmp_repo(tmp_folder)
+
+            expected_example_deps = {
+                "examples/flax/test_flax_examples.py": ["examples/flax/text-classification/run_glue.py"],
+                "examples/pytorch/test_pytorch_examples.py": ["examples/pytorch/text-classification/run_glue.py"],
+                "examples/tensorflow/test_tensorflow_examples.py": [
+                    "examples/tensorflow/text-classification/run_glue.py"
+                ],
+            }
+
+            expected_examples = {
+                "examples/flax/test_flax_examples.py",
+                "examples/flax/text-classification/run_glue.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/pytorch/text-classification/run_glue.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+                "examples/tensorflow/text-classification/run_glue.py",
+            }
+
+            with patch_transformer_repo_path(tmp_folder):
+                example_deps, all_examples = init_test_examples_dependencies()
+                assert example_deps == expected_example_deps
+                assert {str(f.relative_to(tmp_folder)) for f in all_examples} == expected_examples
+
     def test_create_reverse_dependency_map(self):
         with tempfile.TemporaryDirectory() as tmp_folder:
             tmp_folder = Path(tmp_folder)
@@ -506,6 +559,12 @@ def test_create_reverse_dependency_map(self):
                 "src/transformers/__init__.py",
                 "src/transformers/models/bert/__init__.py",
                 "tests/models/bert/test_modeling_bert.py",
+                "examples/flax/test_flax_examples.py",
+                "examples/flax/text-classification/run_glue.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/pytorch/text-classification/run_glue.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+                "examples/tensorflow/text-classification/run_glue.py",
             }
             assert set(reverse_map["src/transformers/models/bert/modeling_bert.py"]) == expected_bert_deps
 
@@ -521,6 +580,12 @@ def test_create_reverse_dependency_map(self):
                 "src/transformers/modeling_utils.py",
                 "tests/test_modeling_common.py",
                 "tests/models/bert/test_modeling_bert.py",
+                "examples/flax/test_flax_examples.py",
+                "examples/flax/text-classification/run_glue.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/pytorch/text-classification/run_glue.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+                "examples/tensorflow/text-classification/run_glue.py",
             }
             assert set(reverse_map["src/transformers/__init__.py"]) == expected_init_deps
 
@@ -529,6 +594,12 @@ def test_create_reverse_dependency_map(self):
                 "src/transformers/models/bert/configuration_bert.py",
                 "src/transformers/models/bert/modeling_bert.py",
                 "tests/models/bert/test_modeling_bert.py",
+                "examples/flax/test_flax_examples.py",
+                "examples/flax/text-classification/run_glue.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/pytorch/text-classification/run_glue.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+                "examples/tensorflow/text-classification/run_glue.py",
             }
             assert set(reverse_map["src/transformers/models/bert/__init__.py"]) == expected_init_deps
 
@@ -543,6 +614,12 @@ def test_create_reverse_dependency_map(self):
                 "src/transformers/models/bert/configuration_bert.py",
                 "src/transformers/models/bert/modeling_bert.py",
                 "tests/models/bert/test_modeling_bert.py",
+                "examples/flax/test_flax_examples.py",
+                "examples/flax/text-classification/run_glue.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/pytorch/text-classification/run_glue.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+                "examples/tensorflow/text-classification/run_glue.py",
             }
             assert set(reverse_map["src/transformers/models/bert/__init__.py"]) == expected_init_deps
 
@@ -554,13 +631,26 @@ def test_create_module_to_test_map(self):
             with patch_transformer_repo_path(tmp_folder):
                 test_map = create_module_to_test_map(filter_models=True)
 
+            expected_bert_tests = {
+                "examples/flax/test_flax_examples.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+                "tests/models/bert/test_modeling_bert.py",
+            }
+
             for model in models:
-                assert test_map[f"src/transformers/models/{model}/modeling_{model}.py"] == [
-                    f"tests/models/{model}/test_modeling_{model}.py"
-                ]
+                if model != "bert":
+                    assert test_map[f"src/transformers/models/{model}/modeling_{model}.py"] == [
+                        f"tests/models/{model}/test_modeling_{model}.py"
+                    ]
+                else:
+                    assert set(test_map[f"src/transformers/models/{model}/modeling_{model}.py"]) == expected_bert_tests
 
             # Init got filtered
             expected_init_tests = {
+                "examples/flax/test_flax_examples.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
                 "tests/test_modeling_common.py",
                 "tests/models/bert/test_modeling_bert.py",
                 "tests/models/gpt2/test_modeling_gpt2.py",
@@ -575,12 +665,21 @@ def test_infer_tests_to_run(self):
 
             commit_changes("src/transformers/models/bert/modeling_bert.py", BERT_MODEL_FILE_NEW_CODE, repo)
 
+            example_tests = {
+                "examples/flax/test_flax_examples.py",
+                "examples/pytorch/test_pytorch_examples.py",
+                "examples/tensorflow/test_tensorflow_examples.py",
+            }
+
             with patch_transformer_repo_path(tmp_folder):
                 infer_tests_to_run(tmp_folder / "test-output.txt", diff_with_last_commit=True)
                 with open(tmp_folder / "test-output.txt", "r") as f:
                     tests_to_run = f.read()
+                with open(tmp_folder / "examples_test_list.txt", "r") as f:
+                    example_tests_to_run = f.read()
 
             assert tests_to_run == "tests/models/bert/test_modeling_bert.py"
+            assert set(example_tests_to_run.split(" ")) == example_tests
 
             # Fake a new model addition
             repo = create_tmp_repo(tmp_folder, models=models)
@@ -617,6 +716,8 @@ def test_infer_tests_to_run(self):
                 infer_tests_to_run(tmp_folder / "test-output.txt")
                 with open(tmp_folder / "test-output.txt", "r") as f:
                     tests_to_run = f.read()
+                with open(tmp_folder / "examples_test_list.txt", "r") as f:
+                    example_tests_to_run = f.read()
 
             expected_tests = {
                 "tests/models/bert/test_modeling_bert.py",
@@ -625,15 +726,19 @@ def test_infer_tests_to_run(self):
                 "tests/test_modeling_common.py",
             }
             assert set(tests_to_run.split(" ")) == expected_tests
+            assert set(example_tests_to_run.split(" ")) == example_tests
 
             with patch_transformer_repo_path(tmp_folder):
                 infer_tests_to_run(tmp_folder / "test-output.txt", filter_models=False)
                 with open(tmp_folder / "test-output.txt", "r") as f:
                     tests_to_run = f.read()
+                with open(tmp_folder / "examples_test_list.txt", "r") as f:
+                    example_tests_to_run = f.read()
 
             expected_tests = [f"tests/models/{name}/test_modeling_{name}.py" for name in models + ["t5"]]
             expected_tests = set(expected_tests + ["tests/test_modeling_common.py"])
             assert set(tests_to_run.split(" ")) == expected_tests
+            assert set(example_tests_to_run.split(" ")) == example_tests
 
     def test_infer_tests_to_run_with_test_modifs(self):
         with tempfile.TemporaryDirectory() as tmp_folder:
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index 05009e9759fb91..d8373da5ef5b96 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -46,6 +46,7 @@
 
 
 PATH_TO_REPO = Path(__file__).parent.parent.resolve()
+PATH_TO_EXAMPLES = PATH_TO_REPO / "examples"
 PATH_TO_TRANFORMERS = PATH_TO_REPO / "src/transformers"
 PATH_TO_TESTS = PATH_TO_REPO / "tests"
 
@@ -512,15 +513,40 @@ def print_tree_deps_of(module, all_edges=None):
         print(line[0])
 
 
+def init_test_examples_dependencies():
+    """
+    The test examples do not import from the examples (which are just scripts, not modules) so we need som extra
+    care initializing the dependency map there.
+    """
+    test_example_deps = {}
+    all_examples = []
+    for framework in ["flax", "pytorch", "tensorflow"]:
+        test_files = list((PATH_TO_EXAMPLES / framework).glob("test_*.py"))
+        all_examples.extend(test_files)
+        examples = [
+            f for f in (PATH_TO_EXAMPLES / framework).glob("**/*.py") if f.parent != PATH_TO_EXAMPLES / framework
+        ]
+        all_examples.extend(examples)
+        for test_file in test_files:
+            with open(test_file, "r", encoding="utf-8") as f:
+                content = f.read()
+            test_example_deps[str(test_file.relative_to(PATH_TO_REPO))] = [
+                str(e.relative_to(PATH_TO_REPO)) for e in examples if e.name in content
+            ]
+    return test_example_deps, all_examples
+
+
 def create_reverse_dependency_map():
     """
     Create the dependency map from module/test filename to the list of modules/tests that depend on it (even
     recursively).
     """
     cache = {}
-    all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py")) + list(PATH_TO_TESTS.glob("**/*.py"))
+    example_deps, examples = init_test_examples_dependencies()
+    all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py")) + list(PATH_TO_TESTS.glob("**/*.py")) + examples
     all_modules = [str(mod.relative_to(PATH_TO_REPO)) for mod in all_modules]
     direct_deps = {m: get_module_dependencies(m, cache=cache) for m in all_modules}
+    direct_deps.update(example_deps)
 
     # This recurses the dependencies
     something_changed = True
@@ -557,7 +583,15 @@ def create_module_to_test_map(reverse_map=None, filter_models=False):
     """
     if reverse_map is None:
         reverse_map = create_reverse_dependency_map()
-    test_map = {module: [f for f in deps if f.startswith("tests")] for module, deps in reverse_map.items()}
+
+    def is_test(fname):
+        if fname.startswith("tests"):
+            return True
+        if fname.startswith("examples") and fname.split(os.path.sep)[-1].startswith("test"):
+            return True
+        return False
+
+    test_map = {module: [f for f in deps if is_test(f)] for module, deps in reverse_map.items()}
 
     if not filter_models:
         return test_map
@@ -627,9 +661,7 @@ def create_json_map(test_files_to_run, json_output_file):
         json.dump(test_map, fp, ensure_ascii=False)
 
 
-def infer_tests_to_run(
-    output_file, diff_with_last_commit=False, filters=None, filter_models=True, json_output_file=None
-):
+def infer_tests_to_run(output_file, diff_with_last_commit=False, filter_models=True, json_output_file=None):
     modified_files = get_modified_python_files(diff_with_last_commit=diff_with_last_commit)
     print(f"\n### MODIFIED FILES ###\n{_print_list(modified_files)}")
 
@@ -663,11 +695,6 @@ def infer_tests_to_run(
         test_files_to_run = [f for f in test_files_to_run if not f.split(os.path.sep)[1] == "sagemaker"]
         # Make sure we did not end up with a test file that was removed
         test_files_to_run = [f for f in test_files_to_run if (PATH_TO_REPO / f).exists()]
-        if filters is not None:
-            filtered_files = []
-            for _filter in filters:
-                filtered_files.extend([f for f in test_files_to_run if f.startswith(_filter)])
-            test_files_to_run = filtered_files
 
         repo_utils_launch = any(f.split(os.path.sep)[1] == "repo_utils" for f in modified_files)
 
@@ -676,6 +703,8 @@ def infer_tests_to_run(
         with open(repo_util_file, "w", encoding="utf-8") as f:
             f.write("tests/repo_utils")
 
+    examples_tests_to_run = [f for f in test_files_to_run if f.startswith("examples")]
+    test_files_to_run = [f for f in test_files_to_run if not f.startswith("examples")]
     print(f"\n### TEST TO RUN ###\n{_print_list(test_files_to_run)}")
     if len(test_files_to_run) > 0:
         with open(output_file, "w", encoding="utf-8") as f:
@@ -690,6 +719,12 @@ def infer_tests_to_run(
 
         create_json_map(test_files_to_run, json_output_file)
 
+    print(f"\n### EXAMPLES TEST TO RUN ###\n{_print_list(examples_tests_to_run)}")
+    if len(examples_tests_to_run) > 0:
+        example_file = Path(output_file).parent / "examples_test_list.txt"
+        with open(example_file, "w", encoding="utf-8") as f:
+            f.write(" ".join(examples_tests_to_run))
+
     doctest_list = get_doctest_files()
 
     print(f"\n### DOCTEST TO RUN ###\n{_print_list(doctest_list)}")
@@ -763,13 +798,6 @@ def parse_commit_message(commit_message):
         action="store_true",
         help="To fetch the tests between the current commit and the last commit",
     )
-    parser.add_argument(
-        "--filters",
-        type=str,
-        nargs="*",
-        default=["tests"],
-        help="Only keep the test files matching one of those filters.",
-    )
     parser.add_argument(
         "--filter_tests",
         action="store_true",
@@ -814,7 +842,6 @@ def parse_commit_message(commit_message):
                 infer_tests_to_run(
                     args.output_file,
                     diff_with_last_commit=diff_with_last_commit,
-                    filters=args.filters,
                     json_output_file=args.json_output_file,
                     filter_models=not commit_flags["no_filter"],
                 )