From de39ccdd990a72b524a0751f7986ab6cf80dbbcf Mon Sep 17 00:00:00 2001
From: Boris Zbarsky <bzbarsky@apple.com>
Date: Wed, 6 Sep 2023 13:00:51 -0400
Subject: [PATCH] Add CI testing for purposeful YAML failures.

This should catch cases when for some reason we are _not_ running the YAML tests
right, and tests that should fail do not fail.
---
 .github/workflows/tests.yaml                  | 31 +++++++++++++
 scripts/tests/chiptest/__init__.py            | 11 +++++
 scripts/tests/chiptest/test_definition.py     |  1 +
 scripts/tests/run_test_suite.py               | 33 +++++++++++---
 .../TestPurposefulFailureEqualities.yaml      | 44 +++++++++++++++++++
 5 files changed, 114 insertions(+), 6 deletions(-)
 create mode 100644 src/app/tests/suites/TestPurposefulFailureEqualities.yaml

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index ce0881ae6c1cec..aa54505bbae9ab 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -214,6 +214,21 @@ jobs:
                      --bridge-app ./out/linux-x64-bridge-${BUILD_VARIANT}/chip-bridge-app \
                   "
 
+            - name: Run purposeful failure tests using the python parser sending commands to chip-tool
+              run: |
+                  ./scripts/run_in_build_env.sh \
+                  "./scripts/tests/run_test_suite.py \
+                     --runner chip_tool_python \
+                     --include-tags PURPOSEFUL_FAILURE \
+                     --chip-tool ./out/linux-x64-chip-tool${CHIP_TOOL_VARIANT}-${BUILD_VARIANT}/chip-tool \
+                     run \
+                     --iterations 1 \
+                     --expected-failures 1 \
+                     --keep-going \
+                     --test-timeout-seconds 120 \
+                     --all-clusters-app ./out/linux-x64-all-clusters-${BUILD_VARIANT}/chip-all-clusters-app \
+                  "
+
             - name: Run Tests using chip-repl (skip slow)
               if: github.event_name == 'pull_request'
               run: |
@@ -225,6 +240,7 @@ jobs:
                      --exclude-tags IN_DEVELOPMENT \
                      --exclude-tags EXTRA_SLOW \
                      --exclude-tags SLOW \
+                     --exclude-tags PURPOSEFUL_FAILURE \
                      run \
                      --iterations 1 \
                      --test-timeout-seconds 120 \
@@ -337,6 +353,21 @@ jobs:
                      --bridge-app ./out/darwin-x64-bridge-${BUILD_VARIANT}/chip-bridge-app \
                   "
 
+            - name: Run purposeful failure tests using the python parser sending commands to chip-tool
+              run: |
+                  ./scripts/run_in_build_env.sh \
+                  "./scripts/tests/run_test_suite.py \
+                     --runner chip_tool_python \
+                     --include-tags PURPOSEFUL_FAILURE \
+                     --chip-tool ./out/darwin-x64-chip-tool${CHIP_TOOL_VARIANT}-${BUILD_VARIANT}/chip-tool \
+                     run \
+                     --iterations 1 \
+                     --expected-failures 1 \
+                     --keep-going \
+                     --test-timeout-seconds 120 \
+                     --all-clusters-app ./out/darwin-x64-all-clusters-${BUILD_VARIANT}/chip-all-clusters-app \
+                  "
+
             - name: Uploading core files
               uses: actions/upload-artifact@v3
               if: ${{ failure() && !env.ACT }}
diff --git a/scripts/tests/chiptest/__init__.py b/scripts/tests/chiptest/__init__.py
index 6267ee037db863..eda65d58438abe 100644
--- a/scripts/tests/chiptest/__init__.py
+++ b/scripts/tests/chiptest/__init__.py
@@ -197,6 +197,13 @@ def _GetChipReplUnsupportedTests() -> Set[str]:
     }
 
 
+def _GetPurposefulFailureTests() -> Set[str]:
+    """Tests that fail in YAML on purpose."""
+    return {
+        "TestPurposefulFailureEqualities.yaml"
+    }
+
+
 def _AllYamlTests():
     yaml_test_suite_path = Path(_YAML_TEST_SUITE_PATH)
 
@@ -270,6 +277,7 @@ def _AllFoundYamlTests(treat_repl_unsupported_as_in_development: bool, use_short
     extra_slow_tests = _GetExtraSlowTests()
     in_development_tests = _GetInDevelopmentTests()
     chip_repl_unsupported_tests = _GetChipReplUnsupportedTests()
+    purposeful_failure_tests = _GetPurposefulFailureTests()
 
     for path in _AllYamlTests():
         if not _IsValidYamlTest(path.name):
@@ -291,6 +299,9 @@ def _AllFoundYamlTests(treat_repl_unsupported_as_in_development: bool, use_short
         if path.name in in_development_tests:
             tags.add(TestTag.IN_DEVELOPMENT)
 
+        if path.name in purposeful_failure_tests:
+            tags.add(TestTag.PURPOSEFUL_FAILURE)
+
         if treat_repl_unsupported_as_in_development and path.name in chip_repl_unsupported_tests:
             tags.add(TestTag.IN_DEVELOPMENT)
 
diff --git a/scripts/tests/chiptest/test_definition.py b/scripts/tests/chiptest/test_definition.py
index 68f2323a3302fc..694f8c7e75feb1 100644
--- a/scripts/tests/chiptest/test_definition.py
+++ b/scripts/tests/chiptest/test_definition.py
@@ -219,6 +219,7 @@ class TestTag(Enum):
     IN_DEVELOPMENT = auto()  # test may not pass or undergoes changes
     CHIP_TOOL_PYTHON_ONLY = auto()  # test uses YAML features only supported by the CHIP_TOOL_PYTHON runner.
     EXTRA_SLOW = auto()      # test uses Sleep and is generally _very_ slow (>= 60s is a typical threshold)
+    PURPOSEFUL_FAILURE = auto()  # test fails on purpose
 
     def to_s(self):
         for (k, v) in TestTag.__members__.items():
diff --git a/scripts/tests/run_test_suite.py b/scripts/tests/run_test_suite.py
index ae5e569b36dd59..17124a6d9b24c1 100755
--- a/scripts/tests/run_test_suite.py
+++ b/scripts/tests/run_test_suite.py
@@ -174,7 +174,8 @@ def main(context, dry_run, log_level, target, target_glob, target_skip_glob,
             TestTag.MANUAL,
             TestTag.IN_DEVELOPMENT,
             TestTag.FLAKY,
-            TestTag.EXTRA_SLOW
+            TestTag.EXTRA_SLOW,
+            TestTag.PURPOSEFUL_FAILURE,
         }
 
         if runtime != TestRunTime.CHIP_TOOL_PYTHON:
@@ -273,9 +274,19 @@ def cmd_list(context):
     default=None,
     type=int,
     help='If provided, fail if a test runs for longer than this time')
+@click.option(
+    '--expected-failures',
+    type=int,
+    default=0,
+    show_default=True,
+    help='Number of tests that are expected to fail in each iteration.  Overall test will pass if the number of failures matches this.  Nonzero values require --keep-going')
 @click.pass_context
 def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, ota_requestor_app,
-            tv_app, bridge_app, chip_repl_yaml_tester, chip_tool_with_python, pics_file, keep_going, test_timeout_seconds):
+            tv_app, bridge_app, chip_repl_yaml_tester, chip_tool_with_python, pics_file, keep_going, test_timeout_seconds, expected_failures):
+    if expected_failures != 0 and not keep_going:
+        logging.exception(f"'--expected-failures {expected_failures}' used without '--keep-going'")
+        sys.exit(2)
+
     runner = chiptest.runner.Runner()
 
     paths_finder = PathsFinder()
@@ -327,8 +338,14 @@ def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, o
     apps_register = AppsRegister()
     apps_register.init()
 
+    def cleanup():
+        apps_register.uninit()
+        if sys.platform == 'linux':
+            chiptest.linux.ShutdownNamespaceForTestExecution()
+
     for i in range(iterations):
         logging.info("Starting iteration %d" % (i+1))
+        observed_failures = 0
         for test in context.obj.tests:
             if context.obj.include_tags:
                 if not (test.tags & context.obj.include_tags):
@@ -357,13 +374,17 @@ def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, o
                 test_end = time.monotonic()
                 logging.exception('%-30s - FAILED in %0.2f seconds' %
                                   (test.name, (test_end - test_start)))
+                observed_failures += 1
                 if not keep_going:
-                    apps_register.uninit()
+                    cleanup()
                     sys.exit(2)
 
-    apps_register.uninit()
-    if sys.platform == 'linux':
-        chiptest.linux.ShutdownNamespaceForTestExecution()
+        if observed_failures != expected_failures:
+            logging.exception(f'Iteration {i}: expected failure count {expected_failures}, but got {observed_failures}')
+            cleanup()
+            sys.exit(2)
+
+    cleanup()
 
 
 # On linux, allow an execution shell to be prepared
diff --git a/src/app/tests/suites/TestPurposefulFailureEqualities.yaml b/src/app/tests/suites/TestPurposefulFailureEqualities.yaml
new file mode 100644
index 00000000000000..14e886437d678e
--- /dev/null
+++ b/src/app/tests/suites/TestPurposefulFailureEqualities.yaml
@@ -0,0 +1,44 @@
+# Copyright (c) 2023 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Test that purposefully fails in EqualityCommands
+
+config:
+    nodeId: 0x12344321
+    cluster: "EqualityCommands"
+    endpoint: 1
+
+tests:
+    - label: "Wait for the commissioned device to be retrieved"
+      cluster: "DelayCommands"
+      command: "WaitForCommissionee"
+      arguments:
+          values:
+              - name: "nodeId"
+                value: nodeId
+
+    - label:
+          "Compute the result of comparing 0 to 1 and claim that they are equal"
+      command: "UnsignedNumberEquals"
+      arguments:
+          values:
+              - name: "Value1"
+                value: 0
+              - name: "Value2"
+                value: 1
+      response:
+          - values:
+                - name: "Equals"
+                  # This is the wrong value on purpose, so this test will fail.
+                  value: true