Skip to content

Commit

Permalink
feat: aggregate and summarize metrics with rust (#1154)
Browse files Browse the repository at this point in the history
* chore: move metrics_unify to openvm-prof

* feat(prof): aggregate and summary metrics with rust

* feat: add diffs for aggregate/summary

* fix

* fix: handle prev formatting mismatch

* hacky add metadata

* chore: copy less files
  • Loading branch information
jonathanpwang authored Jan 2, 2025
1 parent cb6c5a9 commit 1425c1d
Show file tree
Hide file tree
Showing 17 changed files with 15,015 additions and 717 deletions.
93 changes: 4 additions & 89 deletions .github/workflows/benchmark-call.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,9 @@ on:
workflow_dispatch:
inputs:
benchmark_name:
type: choice
type: string
required: true
description: The name of the benchmark to run
options:
- verify_fibair
- fibonacci
- revm_transfer
- regex
- base64_json
- fib_e2e
instance_type:
type: string
required: false
Expand Down Expand Up @@ -104,9 +97,7 @@ on:
description: Whether to run the e2e benchmark

env:
S3_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/results
S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics
PUBLIC_S3_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/flamegraphs
FEATURE_FLAGS: "bench-metrics,parallel,nightly-features"
CMD_ARGS: ""
INPUT_ARGS: ""
Expand All @@ -128,7 +119,6 @@ jobs:
##########################################################################
# Environment setup #
##########################################################################

- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha || github.sha }}
Expand Down Expand Up @@ -211,7 +201,7 @@ jobs:
python3 ${{ steps.set-working-dir.outputs.relative_path }}/ci/scripts/bench.py $BIN_NAME $CMD_ARGS $INPUT_ARGS
##########################################################################
# Generate result .md files and flamegraphs, store them in S3 #
# Store metric json file to S3 #
##########################################################################
- name: Upload metric json and compute diff with previous to generate markdown
run: |
Expand All @@ -221,9 +211,6 @@ jobs:
s5cmd cp $METRIC_PATH ${{ env.S3_METRICS_PATH }}/${METRIC_NAME}-${current_sha}.json
source ci/scripts/utils.sh
generate_markdown $METRIC_PATH $METRIC_NAME ${{ env.S3_METRICS_PATH }} "."
# - name: Install inferno-flamegraph
# run: cargo install inferno

Expand All @@ -235,82 +222,10 @@ jobs:
# echo "UPLOAD_FLAMEGRAPHS=1" >> $GITHUB_ENV
# fi

- name: Add benchmark metadata and upload markdown
id: add_metadata
run: |
source ci/scripts/utils.sh
add_metadata results.md ${{ inputs.max_segment_length }} ${{ inputs.instance_type }} ${{ inputs.memory_allocator }} ${{ github.repository }} ${{ github.run_id }}
s3_md_file="${METRIC_NAME}-${current_sha}.md"
s5cmd cp results.md "${{ env.S3_PATH }}/${s3_md_file}"
##########################################################################
# Update S3 with individual results upon a push event #
# Update s3 for latest main metrics upon a push event #
##########################################################################
- name: Update latest main result in s3
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
s5cmd cp "${{ env.S3_PATH }}/${METRIC_NAME}-${{ env.current_sha }}.md" "${{ env.S3_PATH }}/main-${METRIC_NAME}.md"
if [[ -f $METRIC_PATH ]]; then
s5cmd cp $METRIC_PATH "${{ env.S3_METRICS_PATH }}/main-${METRIC_NAME}.json"
fi
##########################################################################
# Update benchmark-results with individual results #
##########################################################################
- uses: actions/checkout@v4
with:
ref: benchmark-results

- name: Set up git
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
- name: Set github pages path for dispatch
run: |
BENCHMARK_RESULTS_PATH="benchmarks-dispatch/${{ github.head_ref || github.ref }}"
echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV
- name: Set github pages path for PR
if: github.event_name == 'pull_request'
run: |
BENCHMARK_RESULTS_PATH="benchmarks-pr/${{ github.event.pull_request.number }}/individual"
echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV
- name: Set github pages path for push
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
BENCHMARK_RESULTS_PATH="benchmarks/individual"
echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV
- name: Update PR github pages with new bench results
if: github.event.pull_request.head.repo.fork == false # forks do not have write access
run: |
mkdir -p ${BENCHMARK_RESULTS_PATH}
s3_md_file="${METRIC_NAME}-${current_sha}.md"
s5cmd cp "${{ env.S3_PATH }}/${s3_md_file}" ${BENCHMARK_RESULTS_PATH}/${s3_md_file}
git add ${BENCHMARK_RESULTS_PATH}/${s3_md_file}
git commit --allow-empty -m "Update benchmark result at ${BENCHMARK_RESULTS_PATH}/${s3_md_file}"
MAX_RETRIES=10
RETRY_DELAY=5
ATTEMPT=0
SUCCESS=false
while [ $ATTEMPT -lt $MAX_RETRIES ]; do
echo "Attempt $((ATTEMPT + 1)) to push of $MAX_RETRIES..."
git fetch origin benchmark-results
git merge origin/benchmark-results --no-edit
if git push origin benchmark-results; then
SUCCESS=true
break
else
echo "Push failed. Retrying in $RETRY_DELAY seconds..."
sleep $RETRY_DELAY
ATTEMPT=$((ATTEMPT + 1))
fi
done
if [ "$SUCCESS" = false ]; then
echo "PUSH_FAILED"
exit 1
fi
s5cmd cp $METRIC_PATH "${{ env.S3_METRICS_PATH }}/main-${METRIC_NAME}.json"
149 changes: 104 additions & 45 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ on:
- "benchmarks/**"
- ".github/workflows/benchmark-call.yml"
- ".github/workflows/benchmarks.yml"
workflow_dispatch:

concurrency:
group: benchmark-${{ github.event.pull_request.number || github.sha }}
Expand All @@ -25,6 +26,8 @@ env:
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
REPO: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics
S3_MD_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/results

permissions:
contents: write
Expand Down Expand Up @@ -138,6 +141,15 @@ jobs:
ref: ${{ env.CURRENT_SHA }}
repository: ${{ env.REPO }}

- name: Install openvm-prof
working-directory: crates/prof
run: cargo install --force --profile=dev --path .

- name: Set github pages path for dispatch
run: |
BENCHMARK_RESULTS_PATH="benchmarks-dispatch/${{ github.head_ref || github.ref }}"
echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV
- name: Set github pages path for PR
if: github.event_name == 'pull_request'
run: |
Expand All @@ -150,59 +162,84 @@ jobs:
BENCHMARK_RESULTS_PATH="benchmarks"
echo "BENCHMARK_RESULTS_PATH=${BENCHMARK_RESULTS_PATH}" >> $GITHUB_ENV
- name: Load all metadata files from S3
- name: Download all metric json files from S3
run: |
current_sha=$(git rev-parse HEAD)
md_files=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r '
json_files=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r '
.[] |
select(.e2e_bench != true) |
"\(.id)-"' |
sed "s/$/${current_sha}.md/" |
sort)
md_file_list=$(echo -n "$md_files" | paste -sd "," -)
"\(.id)-${{ env.CURRENT_SHA }}.json"')
json_file_list=$(echo -n "$json_files" | paste -sd "," -)
echo $json_file_list
e2e_md_files=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r '
prev_json_files=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r '
.[] |
select(.e2e_bench == true) |
"\(.id)-"' |
sed "s/$/${current_sha}.md/" |
sort)
e2e_md_file_list=$(echo -n "$e2e_md_files" | paste -sd "," -)
while read md_file; do
if [ -z "$md_file" ]; then
"main-\(.id).json"')
prev_json_file_list=$(echo -n "$prev_json_files" | paste -sd "," -)
echo $prev_json_file_list
(echo "$json_files"; echo "$prev_json_files") | while read json_file; do
if [ -z "$json_file" ]; then
continue
fi
echo "Downloading results for benchmark: $md_file"
s5cmd cp "s3://openvm-public-data-sandbox-us-east-1/benchmark/github/results/${md_file}" "${md_file}"
done <<< "$md_files"
E2E_FILE_LIST=""
if [[ -n "$e2e_md_files" ]]; then
while read e2e_md_file; do
echo "Downloading results for benchmark: $e2e_md_file"
s5cmd cp "s3://openvm-public-data-sandbox-us-east-1/benchmark/github/results/${e2e_md_file}" "${e2e_md_file}"
done <<< "$e2e_md_files"
E2E_FILE_LIST="${e2e_md_file_list}"
fi
echo "Downloading metrics for benchmark: $json_file"
if ! s5cmd cp "${{ env.S3_METRICS_PATH }}/${json_file}" "${json_file}"; then
echo "Warning: Failed to download ${json_file}, skipping..."
continue
fi
done
echo "Benchmarks: ${md_file_list}"
echo "E2E Benchmarks: ${E2E_FILE_LIST}"
openvm-prof --json-paths "${json_file_list}" \
--prev-json-paths "${prev_json_file_list}" \
summary \
--benchmark-results-link "https://github.com/${{ github.repository }}/blob/benchmark-results/${BENCHMARK_RESULTS_PATH}" \
--summary-md-path summary.md
python3 ci/scripts/metric_unify/summarize.py "${md_file_list}" \
--e2e-md-files "${E2E_FILE_LIST}" \
--aggregation-json ci/scripts/metric_unify/aggregation.json \
--benchmark-results-link "https://github.com/${{ github.repository }}/blob/benchmark-results/${BENCHMARK_RESULTS_PATH}"
COMMIT_URL=https://github.com/${{ github.repository }}/commit/${CURRENT_SHA}
BENCHMARK_WORKFLOW_URL=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
TMP_DIR=/tmp/benchmark-results/${{ env.CURRENT_SHA }}
echo "TMP_DIR=${TMP_DIR}" >> $GITHUB_ENV
mkdir -p ${TMP_DIR}
# add metadata to markdown files
source ci/scripts/utils.sh
# Parse matrix data into associative arrays
while IFS= read -r line; do
# Convert .json to .md
md_file="${line%.json}.md"
id=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r --arg file "$line" '.[] |
select(.id == ($file | split("-")[0])) |
{
max_segment_length: .max_segment_length,
instance_type: .instance_type,
memory_allocator: .memory_allocator
}')
if [ ! -z "$id" ]; then
max_segment_length=$(echo "$id" | jq -r '.max_segment_length')
instance_type=$(echo "$id" | jq -r '.instance_type')
memory_allocator=$(echo "$id" | jq -r '.memory_allocator')
# Call add_metadata for each file with its corresponding data
add_metadata \
"$md_file" \
"$max_segment_length" \
"$instance_type" \
"$memory_allocator" \
"$COMMIT_URL" \
"$BENCHMARK_WORKFLOW_URL"
fi
cp "$md_file" "${TMP_DIR}/"
done <<< "$json_files"
echo "" >> summary.md
echo "Commit: https://github.com/${{ github.repository }}/commit/${CURRENT_SHA}" >> summary.md
echo "Commit: $COMMIT_URL" >> summary.md
echo "" >> summary.md
echo "[Benchmark Workflow](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> summary.md
mkdir -p /tmp/benchmark-results/
cp summary.md /tmp/benchmark-results/
echo "[Benchmark Workflow]($BENCHMARK_WORKFLOW_URL)" >> summary.md
cp summary.md ${TMP_DIR}/
##########################################################################
# Update benchmark-results with summary upon a PR event #
# Update benchmark-results branch with summary upon a PR event #
##########################################################################
- uses: actions/checkout@v4
if: github.event_name == 'pull_request' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
Expand All @@ -219,14 +256,36 @@ jobs:
if: (github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false) || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
mkdir -p ${BENCHMARK_RESULTS_PATH}
cp /tmp/benchmark-results/summary.md ${BENCHMARK_RESULTS_PATH}/summary.md
git add ${BENCHMARK_RESULTS_PATH}/summary.md
cp ${TMP_DIR}/*.md ${BENCHMARK_RESULTS_PATH}/
git add ${BENCHMARK_RESULTS_PATH}
if [[ "${{ github.event_name }}" == "push" ]]; then
cp /tmp/benchmark-results/summary.md index.md
git add index.md
fi
git commit --allow-empty -m "Update summarized benchmark result at ${BENCHMARK_RESULTS_PATH}/summary.md"
git push --force
git commit --allow-empty -m "Update benchmark results at ${{ env.CURRENT_SHA }}"
MAX_RETRIES=10
RETRY_DELAY=5
ATTEMPT=0
SUCCESS=false
while [ $ATTEMPT -lt $MAX_RETRIES ]; do
echo "Attempt $((ATTEMPT + 1)) to push of $MAX_RETRIES..."
git fetch origin benchmark-results
git merge origin/benchmark-results --no-edit
if git push origin benchmark-results; then
SUCCESS=true
break
else
echo "Push failed. Retrying in $RETRY_DELAY seconds..."
sleep $RETRY_DELAY
ATTEMPT=$((ATTEMPT + 1))
fi
done
if [ "$SUCCESS" = false ]; then
echo "PUSH_FAILED"
exit 1
fi
##########################################################################
# Update PR comment upon a pull request event #
Expand Down Expand Up @@ -262,7 +321,7 @@ jobs:
with:
script: |
const fs = require('fs')
const newBenchmark = fs.readFileSync('/tmp/benchmark-results/summary.md', { encoding: 'utf8', flag: 'r' })
const newBenchmark = fs.readFileSync('${{ env.TMP_DIR }}/summary.md', { encoding: 'utf8', flag: 'r' })
github.rest.issues.createComment({
issue_number: context.issue.number,
Expand Down
Loading

0 comments on commit 1425c1d

Please sign in to comment.