.github/workflows/benchmarks.yml

name: "OpenVM Benchmarks: Coordinate Runner & Reporting"

on:
  push:
    branches: ["main"]
  pull_request:
    types: [opened, synchronize, reopened, labeled]
    branches: ["**"]
    paths:
      - "crates/circuits/**"
      - "crates/vm/**"
      - "crates/toolchain/**"
      - "extensions/**"
      - "benchmarks/**"
      - ".github/workflows/benchmark-call.yml"
      - ".github/workflows/benchmarks.yml"
  workflow_dispatch:
    inputs:
      run-benchmark-e2e:
        description: "Run end-to-end benchmarks"
        required: false
        default: "false"

concurrency:
  group: benchmark-${{ github.event.pull_request.number || github.sha }}
  cancel-in-progress: true

env:
  CARGO_TERM_COLOR: always
  OPENVM_FAST_TEST: "1"
  CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
  REPO: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
  CARGO_NET_GIT_FETCH_WITH_CLI: "true"
  S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics
  S3_MD_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/results

permissions:
  contents: write
  pull-requests: write

jobs:
  create-matrix:
    runs-on:
      - runs-on=${{ github.run_id }}
      - runner=8cpu-linux-x64
    outputs:
      matrix: ${{ steps.create-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ env.CURRENT_SHA }}
          repository: ${{ env.REPO }}

      - name: Create benchmark matrix from JSON
        id: create-matrix
        run: |
          if [ ! -f ./ci/benchmark-config.json ]; then
            echo "Error: ./ci/benchmark-config.json not found"
            exit 1
          fi
          cat ./ci/benchmark-config.json

          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            LABELS='${{ toJson(github.event.pull_request.labels) }}'
            RUN_E2E=$(echo "$LABELS" | jq 'any(.name == "run-benchmark-e2e")')
          else
            RUN_E2E=${{ github.event.inputs.run-benchmark-e2e || 'false' }}
          fi

          matrix=$(jq -c --argjson run_e2e $RUN_E2E '
            [
              .benchmarks[] |
              .name as $name |
              .id as $id |
              .e2e_bench as $e2e_bench |
              select($run_e2e or .e2e_bench != true) |
              .run_params[] |
              {
                name: $name,
                e2e_bench: $e2e_bench,
                id: $id,
                instance_type: .instance_type,
                memory_allocator: .memory_allocator,
                app_log_blowup: .app_log_blowup,
                agg_log_blowup: .agg_log_blowup,
                root_log_blowup: (.root_log_blowup // 0),
                internal_log_blowup: (.internal_log_blowup // 0),
                max_segment_length: (.max_segment_length // 1048476)
              }
            ]
          ' ./ci/benchmark-config.json)
          if [ $? -ne 0 ]; then
            echo "Error: Failed to parse ./ci/benchmark-config.json"
            exit 1
          fi
          echo "matrix=$matrix" >> $GITHUB_OUTPUT

  benchmark:
    needs: create-matrix
    strategy:
      matrix:
        benchmark_run: ${{fromJson(needs.create-matrix.outputs.matrix)}}
    uses: ./.github/workflows/benchmark-call.yml
    with:
      benchmark_name: ${{ matrix.benchmark_run.name }}
      benchmark_id: ${{ matrix.benchmark_run.id }}
      instance_type: ${{ matrix.benchmark_run.instance_type }}
      memory_allocator: ${{ matrix.benchmark_run.memory_allocator }}
      app_log_blowup: ${{ matrix.benchmark_run.app_log_blowup }}
      agg_log_blowup: ${{ matrix.benchmark_run.agg_log_blowup }}
      root_log_blowup: ${{ matrix.benchmark_run.root_log_blowup }}
      internal_log_blowup: ${{ matrix.benchmark_run.internal_log_blowup }}
      max_segment_length: ${{ matrix.benchmark_run.max_segment_length }}
      e2e_bench: ${{ matrix.benchmark_run.e2e_bench }}
    secrets: inherit

  summarize:
    needs: [create-matrix, benchmark]
    runs-on:
      - runs-on=${{ github.run_id }}
      - runner=8cpu-linux-arm64
    steps:
      ##########################################################################
      # Install S3 if necessary                                                #
      ##########################################################################
      - name: Install architecture specific tools
        run: |
          S5CMD_BIN="s5cmd_2.2.2_linux_arm64.deb"
          echo "Checking s5cmd"
          if type s5cmd &>/dev/null; then
              echo "s5cmd was installed."
          else
              TMP_DIR=/tmp/s5cmd
              rm -rf $TMP_DIR
              mkdir $TMP_DIR
              echo "s5cmd was not installed. Installing.."
              wget "https://github.com/peak/s5cmd/releases/download/v2.2.2/${S5CMD_BIN}" -P $TMP_DIR
              sudo dpkg -i "${TMP_DIR}/${S5CMD_BIN}"
          fi

      ##########################################################################
      # Download individual result .md files from S3 and combine them          #
      ##########################################################################
      - uses: actions/checkout@v4
        with:
          ref: ${{ env.CURRENT_SHA }}
          repository: ${{ env.REPO }}

      - name: Install openvm-prof
        working-directory: crates/prof
        run: cargo install --force --profile=dev --path .

      - name: Set github pages path for dispatch
        run: |
          BENCHMARK_RESULTS_PATH="benchmarks-dispatch/${{ github.head_ref || github.ref }}"
          echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV

      - name: Set github pages path for PR
        if: github.event_name == 'pull_request'
        run: |
          BENCHMARK_RESULTS_PATH="benchmarks-pr/${{ github.event.pull_request.number }}"
          echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV

      - name: Set github pages path for push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        run: |
          BENCHMARK_RESULTS_PATH="benchmarks"
          echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV

      - name: Download all metric json files from S3
        run: |
          json_files=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r '
            .[] |
            "\(.id)-${{ env.CURRENT_SHA }}.json"')
          json_file_list=$(echo -n "$json_files" | paste -sd "," -)
          echo $json_file_list

          prev_json_files=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r '
            .[] |
            "main-\(.id).json"')
          prev_json_file_list=$(echo -n "$prev_json_files" | paste -sd "," -)
          echo $prev_json_file_list

          (echo "$json_files"; echo "$prev_json_files") | while read json_file; do
            if [ -z "$json_file" ]; then
              continue
            fi
            echo "Downloading metrics for benchmark: $json_file"
            if ! s5cmd cp "${{ env.S3_METRICS_PATH }}/${json_file}" "${json_file}"; then
              echo "Warning: Failed to download ${json_file}, skipping..."
              continue
            fi
          done

          openvm-prof --json-paths "${json_file_list}" \
            --prev-json-paths "${prev_json_file_list}" \
            summary \
            --benchmark-results-link "https://github.com/${{ github.repository }}/blob/benchmark-results/${BENCHMARK_RESULTS_PATH}" \
            --summary-md-path summary.md

          COMMIT_URL=https://github.com/${{ github.repository }}/commit/${CURRENT_SHA}
          BENCHMARK_WORKFLOW_URL=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
          TMP_DIR=/tmp/benchmark-results/${{ env.CURRENT_SHA }}
          echo "TMP_DIR=${TMP_DIR}" >> $GITHUB_ENV
          mkdir -p ${TMP_DIR}

          # add metadata to markdown files
          source ci/scripts/utils.sh
          # Parse matrix data into associative arrays
          while IFS= read -r line; do
            # Convert .json to .md
            md_file="${line%.json}.md"

            id=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r --arg file "$line" '.[] |
              select(.id == ($file | split("-")[0])) |
              {
                max_segment_length: .max_segment_length,
                instance_type: .instance_type,
                memory_allocator: .memory_allocator
              }')

            if [ ! -z "$id" ]; then
              max_segment_length=$(echo "$id" | jq -r '.max_segment_length')
              instance_type=$(echo "$id" | jq -r '.instance_type')
              memory_allocator=$(echo "$id" | jq -r '.memory_allocator')

              # Call add_metadata for each file with its corresponding data
              add_metadata \
                "$md_file" \
                "$max_segment_length" \
                "$instance_type" \
                "$memory_allocator" \
                "$COMMIT_URL" \
                "$BENCHMARK_WORKFLOW_URL"
            fi
            cp "$md_file" "${TMP_DIR}/"
          done <<< "$json_files"

          echo "" >> summary.md
          echo "Commit: $COMMIT_URL" >> summary.md
          echo "" >> summary.md
          echo "[Benchmark Workflow]($BENCHMARK_WORKFLOW_URL)" >> summary.md

          cp summary.md ${TMP_DIR}/

      ##########################################################################
      # Update benchmark-results branch with summary upon a PR event           #
      ##########################################################################
      - uses: actions/checkout@v4
        if: github.event_name == 'pull_request' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
        with:
          ref: benchmark-results

      - name: Set up git
        if: github.event_name == 'pull_request' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
        run: |
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          git config --global user.name "github-actions[bot]"

      - name: Update github pages with new bench results
        if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false) || (github.event_name == 'push' && github.ref == 'refs/heads/main')
        run: |
          mkdir -p ${BENCHMARK_RESULTS_PATH}
          cp ${TMP_DIR}/*.md ${BENCHMARK_RESULTS_PATH}/
          git add ${BENCHMARK_RESULTS_PATH}
          if [[ "${{ github.event_name }}" == "push" ]]; then
            cp ${TMP_DIR}/summary.md index.md
            git add index.md
          fi
          git commit --allow-empty -m "Update benchmark results at ${{ env.CURRENT_SHA }}"

          MAX_RETRIES=10
          RETRY_DELAY=5
          ATTEMPT=0
          SUCCESS=false

          while [ $ATTEMPT -lt $MAX_RETRIES ]; do
              echo "Attempt $((ATTEMPT + 1)) to push of $MAX_RETRIES..."
              git fetch origin benchmark-results
              git merge origin/benchmark-results --no-edit
              if git push origin benchmark-results; then
                  SUCCESS=true
                  break
              else
                  echo "Push failed. Retrying in $RETRY_DELAY seconds..."
                  sleep $RETRY_DELAY
                  ATTEMPT=$((ATTEMPT + 1))
              fi
          done
          if [ "$SUCCESS" = false ]; then
              echo "PUSH_FAILED"
              exit 1
          fi

      ##########################################################################
      # Update PR comment upon a pull request event                            #
      ##########################################################################
      - name: Collapse previous comment (if exists)
        if: github.event_name == 'pull_request'
        uses: actions/github-script@v7
        with:
          script: |
            const comments = await github.rest.issues.listComments({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number
            });
            for (const comment of comments.data) {
              if (comment.user.login == "github-actions[bot]" && comment.body.startsWith("<!--Benchmark Results-->")) {
                console.log("collapse comment ", comment.id);
                const resp = await github.graphql(`
                  mutation {
                    minimizeComment(input: {classifier: OUTDATED, subjectId: "${comment.node_id}"}) {
                      minimizedComment {
                        isMinimized
                      }
                    }
                  }
                `);
              }
            }

      - name: Add comment to pull request
        if: github.event_name == 'pull_request'
        uses: actions/github-script@v7
        with:
          script: |
            const fs = require('fs')
            const newBenchmark = fs.readFileSync('${{ env.TMP_DIR }}/summary.md', { encoding: 'utf8', flag: 'r' })

            github.rest.issues.createComment({
              issue_number: context.issue.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: `<!--Benchmark Results-->\n${newBenchmark}`
            });