ci(workflow): attempts to skip retry for the know failed tests (#3809)

This PR tries to implement WEB-588. Note this still need to be verified, not able to close WEB-588 completely until verification. PR uses upstream next.js changes with latest test results in the turbo repo, attempts to skip tests if it is already known to fail. It only relies on the latest main branch's result currently. Meanwhile this PR also limits concurrency of the tests suite since it _may_ related with random sigterm causes test flaky / count drift across runs. Originally I thought WEB-544 is fixed, but seems not and require more investigations.
vercel · Feb 16, 2023 · 13a4d3e · 13a4d3e
1 parent 564a0f0
commit 13a4d3e
Show file tree

Hide file tree

Showing 4 changed files with 130 additions and 64 deletions.
diff --git a/.github/actions/next-integration-stat/index.js b/.github/actions/next-integration-stat/index.js
@@ -16471,11 +16471,22 @@
           currentTestFailedNames: [],
         }
       );
-      console.log("Current test summary", {
-        currentTestFailedCaseCount,
-        currentTestFailedSuiteCount,
-        currentTestFailedNames,
-      });
+      console.log(
+        "Current test summary",
+        JSON.stringify(
+          {
+            currentTestFailedSuiteCount,
+            currentTestPassedSuiteCount,
+            currentTestTotalSuiteCount,
+            currentTestFailedCaseCount,
+            currentTestPassedCaseCount,
+            currentTestTotalCaseCount,
+            currentTestFailedNames,
+          },
+          null,
+          2
+        )
+      );
       if (!baseResults) {
         console.log("There's no base to compare");
         return `### Test summary
@@ -16516,11 +16527,22 @@
           baseTestFailedNames: [],
         }
       );
-      console.log("Base test summary", {
-        baseTestFailedSuiteCount,
-        baseTestFailedCaseCount,
-        baseTestFailedNames,
-      });
+      console.log(
+        "Base test summary",
+        JSON.stringify(
+          {
+            baseTestFailedSuiteCount,
+            baseTestPassedSuiteCount,
+            baseTestTotalSuiteCount,
+            baseTestFailedCaseCount,
+            baseTestPassedCaseCount,
+            baseTestTotalCaseCount,
+            baseTestFailedNames,
+          },
+          null,
+          2
+        )
+      );
       let testSuiteDiff = ":zero:";
       const suiteCountDiff =
         baseTestFailedSuiteCount - currentTestFailedSuiteCount;
@@ -16720,27 +16742,31 @@
             }
             groupedFails[ancestorKey].push(fail);
           }
-          commentValues.push(`\`${failedTest}\``);
-          failedTestLists.push(failedTest);
-          for (const group of Object.keys(groupedFails).sort()) {
-            const fails = groupedFails[group];
-            commentValues.push(`\n`);
-            fails.forEach((fail) => {
-              commentValues.push(`- ${group} > ${fail.title}`);
-            });
-          }
-          const strippedResultMessage =
-            resultMessage.length >= 50000
-              ? resultMessage.substring(0, 50000) +
-                `...\n(Test result messages are too long, cannot post full message in comment. See the action logs for the full message.)`
-              : resultMessage;
-          if (resultMessage.length >= 50000) {
-            console.log(
-              "Test result messages are too long, comment will post stripped."
-            );
+          if (!failedTestLists.includes(failedTest)) {
+            commentValues.push(`\`${failedTest}\``);
+            failedTestLists.push(failedTest);
           }
           commentValues.push(`\n`);
+          // Currently there are too many test failures to post since it creates several comments.
+          // Only expands if explicitly requested in the option.
           if (shouldExpandResultMessages) {
+            for (const group of Object.keys(groupedFails).sort()) {
+              const fails = groupedFails[group];
+              commentValues.push(`\n`);
+              fails.forEach((fail) => {
+                commentValues.push(`- ${group} > ${fail.title}`);
+              });
+            }
+            const strippedResultMessage =
+              resultMessage.length >= 50000
+                ? resultMessage.substring(0, 50000) +
+                  `...\n(Test result messages are too long, cannot post full message in comment. See the action logs for the full message.)`
+                : resultMessage;
+            if (resultMessage.length >= 50000) {
+              console.log(
+                "Test result messages are too long, comment will post stripped."
+              );
+            }
             commentValues.push(`<details>`);
             commentValues.push(`<summary>Expand output</summary>`);
             commentValues.push(strippedResultMessage);

diff --git a/.github/actions/next-integration-stat/src/index.ts b/.github/actions/next-integration-stat/src/index.ts
@@ -568,11 +568,22 @@ function getTestSummary(
     }
   );
 
-  console.log("Current test summary", {
-    currentTestFailedCaseCount,
-    currentTestFailedSuiteCount,
-    currentTestFailedNames,
-  });
+  console.log(
+    "Current test summary",
+    JSON.stringify(
+      {
+        currentTestFailedSuiteCount,
+        currentTestPassedSuiteCount,
+        currentTestTotalSuiteCount,
+        currentTestFailedCaseCount,
+        currentTestPassedCaseCount,
+        currentTestTotalCaseCount,
+        currentTestFailedNames,
+      },
+      null,
+      2
+    )
+  );
 
   if (!baseResults) {
     console.log("There's no base to compare");
@@ -618,11 +629,22 @@ function getTestSummary(
     }
   );
 
-  console.log("Base test summary", {
-    baseTestFailedSuiteCount,
-    baseTestFailedCaseCount,
-    baseTestFailedNames,
-  });
+  console.log(
+    "Base test summary",
+    JSON.stringify(
+      {
+        baseTestFailedSuiteCount,
+        baseTestPassedSuiteCount,
+        baseTestTotalSuiteCount,
+        baseTestFailedCaseCount,
+        baseTestPassedCaseCount,
+        baseTestTotalCaseCount,
+        baseTestFailedNames,
+      },
+      null,
+      2
+    )
+  );
 
   let testSuiteDiff = ":zero:";
   const suiteCountDiff = baseTestFailedSuiteCount - currentTestFailedSuiteCount;
@@ -816,31 +838,34 @@ async function run() {
       groupedFails[ancestorKey].push(fail);
     }
 
-    commentValues.push(`\`${failedTest}\``);
-    failedTestLists.push(failedTest);
-
-    for (const group of Object.keys(groupedFails).sort()) {
-      const fails = groupedFails[group];
-      commentValues.push(`\n`);
-      fails.forEach((fail) => {
-        commentValues.push(`- ${group} > ${fail.title}`);
-      });
-    }
-
-    const strippedResultMessage =
-      resultMessage.length >= 50000
-        ? resultMessage.substring(0, 50000) +
-          `...\n(Test result messages are too long, cannot post full message in comment. See the action logs for the full message.)`
-        : resultMessage;
-    if (resultMessage.length >= 50000) {
-      console.log(
-        "Test result messages are too long, comment will post stripped."
-      );
+    if (!failedTestLists.includes(failedTest)) {
+      commentValues.push(`\`${failedTest}\``);
+      failedTestLists.push(failedTest);
     }
-
     commentValues.push(`\n`);
 
+    // Currently there are too many test failures to post since it creates several comments.
+    // Only expands if explicitly requested in the option.
     if (shouldExpandResultMessages) {
+      for (const group of Object.keys(groupedFails).sort()) {
+        const fails = groupedFails[group];
+        commentValues.push(`\n`);
+        fails.forEach((fail) => {
+          commentValues.push(`- ${group} > ${fail.title}`);
+        });
+      }
+
+      const strippedResultMessage =
+        resultMessage.length >= 50000
+          ? resultMessage.substring(0, 50000) +
+            `...\n(Test result messages are too long, cannot post full message in comment. See the action logs for the full message.)`
+          : resultMessage;
+      if (resultMessage.length >= 50000) {
+        console.log(
+          "Test result messages are too long, comment will post stripped."
+        );
+      }
+
       commentValues.push(`<details>`);
       commentValues.push(`<summary>Expand output</summary>`);
       commentValues.push(strippedResultMessage);

diff --git a/.github/workflows/nextjs-integration-test.yml b/.github/workflows/nextjs-integration-test.yml
@@ -42,6 +42,7 @@ jobs:
       NEXT_TELEMETRY_DISABLED: 1
       # Path to the next-dev binary located in **docker container** image.
       NEXT_DEV_BIN: /work/next-dev
+      FAILED_TEST_LIST_PATH: /work/integration-test-data/test-results/main/failed-test-path-list.json
       # Glob pattern to run specific tests with --turbo.
       NEXT_DEV_TEST_GLOB: "*"
       # pnpm version should match to what upstream next.js uses
@@ -58,8 +59,12 @@ jobs:
           path: ./*
           key: ${{ github.sha }}-${{ github.run_number }}
 
+      # This step is quite same as upstream next.js integration execution, but with some differences:
+      # Sets `__INTERNAL_CUSTOM_TURBOPACK_BINARY`, `__INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH` to setup custom turbopack.
+      # Sets `NEXT_TEST_SKIP_RETRY_MANIFEST`, `NEXT_TEST_CONTINUE_ON_ERROR` to continue on error but do not retry on the known failed tests.
+      # Do not set --timings flag
       - run: |
-          docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && ls && curl https://install-node.vercel.app/v${{ matrix.node }} | FORCE=1 bash && node -v && npm i -g pnpm@${PNPM_VERSION} && /work/next-dev --display-version && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 NEXT_TEST_MODE=dev xvfb-run node run-tests.js --type development -g ${{ matrix.group }}/4 >> /proc/1/fd/1"
+          docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && ls && curl https://install-node.vercel.app/v${{ matrix.node }} | FORCE=1 bash && node -v && npm i -g pnpm@${PNPM_VERSION} && /work/next-dev --display-version && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_SKIP_RETRY_MANIFEST=${FAILED_TEST_LIST_PATH} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 NEXT_TEST_MODE=dev xvfb-run node run-tests.js --type development -g ${{ matrix.group }}/4 >> /proc/1/fd/1"
         name: Run test/development
         # It is currently expected to fail some of next.js integration test, do not fail CI check.
         continue-on-error: true
@@ -80,6 +85,7 @@ jobs:
       NEXT_TELEMETRY_DISABLED: 1
       # Path to the next-dev binary located in **docker container** image.
       NEXT_DEV_BIN: /work/next-dev
+      FAILED_TEST_LIST_PATH: /work/integration-test-data/test-results/main/failed-test-path-list.json
       # Glob pattern to run specific tests with --turbo.
       NEXT_DEV_TEST_GLOB: "*"
       # pnpm version should match to what upstream next.js uses
@@ -97,7 +103,7 @@ jobs:
           key: ${{ github.sha }}-${{ github.run_number }}
 
       - run: |
-          docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && ls && curl https://install-node.vercel.app/v${{ matrix.node }} | FORCE=1 bash && node -v && npm i -g pnpm@${PNPM_VERSION} && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 NEXT_TEST_MODE=dev xvfb-run node run-tests.js --type e2e -g ${{ matrix.group }}/7 >> /proc/1/fd/1"
+          docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && ls && curl https://install-node.vercel.app/v${{ matrix.node }} | FORCE=1 bash && node -v && npm i -g pnpm@${PNPM_VERSION} && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_SKIP_RETRY_MANIFEST=${FAILED_TEST_LIST_PATH} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 NEXT_TEST_MODE=dev xvfb-run node run-tests.js --type e2e -g ${{ matrix.group }}/7 >> /proc/1/fd/1"
         name: Run test/e2e (dev)
         continue-on-error: true
         env:
@@ -118,6 +124,7 @@ jobs:
       NEXT_TELEMETRY_DISABLED: 1
       # Path to the next-dev binary located in **docker container** image.
       NEXT_DEV_BIN: /work/next-dev
+      FAILED_TEST_LIST_PATH: /work/integration-test-data/test-results/main/failed-test-path-list.json
       # Glob pattern to run specific tests with --turbo.
       NEXT_DEV_TEST_GLOB: "*"
       # pnpm version should match to what upstream next.js uses
@@ -133,7 +140,7 @@ jobs:
       # TODO: This test currently seems to load wasm/swc and does not load the next-dev binary.
       # Temporary disabled until figure out details.
       #- run: |
-      #    docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && curl -s https://install-node.vercel.app/v16 | FORCE=1 bash && npm i -g pnpm@${PNPM_VERSION} > /dev/null && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 NEXT_TEST_CNA=1 xvfb-run node run-tests.js test/integration/create-next-app/index.test.ts test/integration/create-next-app/templates.test.ts >> /proc/1/fd/1"
+      #    docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && curl -s https://install-node.vercel.app/v16 | FORCE=1 bash && npm i -g pnpm@${PNPM_VERSION} > /dev/null && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_SKIP_RETRY_MANIFEST=${FAILED_TEST_LIST_PATH} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 NEXT_TEST_CNA=1 xvfb-run node run-tests.js test/integration/create-next-app/index.test.ts test/integration/create-next-app/templates.test.ts >> /proc/1/fd/1"
       #  name: Run test/e2e (create-next-app)
       #  continue-on-error: true
       #  env:
@@ -154,6 +161,7 @@ jobs:
       NEXT_TELEMETRY_DISABLED: 1
       # Path to the next-dev binary located in **docker container** image.
       NEXT_DEV_BIN: /work/next-dev
+      FAILED_TEST_LIST_PATH: /work/integration-test-data/test-results/main/failed-test-path-list.json
       # Glob pattern to run specific tests with --turbo.
       NEXT_DEV_TEST_GLOB: "*"
       # pnpm version should match to what upstream next.js uses
@@ -198,7 +206,7 @@ jobs:
           key: ${{ github.sha }}-${{ github.run_number }}
 
       - run: |
-          docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && ls && curl https://install-node.vercel.app/v16 | FORCE=1 bash && node -v && npm i -g pnpm@${PNPM_VERSION} && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 xvfb-run node run-tests.js -g ${{ matrix.group }}/25 >> /proc/1/fd/1"
+          docker run --rm -v $(pwd):/work mcr.microsoft.com/playwright:v1.28.1-focal /bin/bash -c "cd /work && ls && curl https://install-node.vercel.app/v16 | FORCE=1 bash && node -v && npm i -g pnpm@${PNPM_VERSION} && __INTERNAL_CUSTOM_TURBOPACK_BINARY=${NEXT_DEV_BIN} __INTERNAL_NEXT_DEV_TEST_TURBO_GLOB_MATCH=${NEXT_DEV_TEST_GLOB} NEXT_TEST_SKIP_RETRY_MANIFEST=${FAILED_TEST_LIST_PATH} NEXT_TEST_CONTINUE_ON_ERROR=TRUE NEXT_TEST_JOB=1 xvfb-run node run-tests.js -g ${{ matrix.group }}/25 >> /proc/1/fd/1"
         name: Test Integration
         continue-on-error: true
         env:

diff --git a/.github/workflows/setup-nextjs-build.yml b/.github/workflows/setup-nextjs-build.yml
@@ -66,12 +66,19 @@ jobs:
       - name: tune linux network
         run: sudo ethtool -K eth0 tx off rx off
 
-      - name: Checkout
+      - name: Checkout Next.js
         uses: actions/checkout@v3
         with:
           repository: vercel/next.js
           ref: ${{ env.NEXTJS_VERSION }}
 
+      - name: Checkout failed test lists
+        uses: actions/checkout@v3
+        with:
+          repository: vercel/turbo
+          ref: nextjs-integration-test-data
+          path: integration-test-data
+
       - uses: actions/cache@v3
         id: restore-build
         with: