From 6a9f089794b72d6d56b22424c69a17e5b55100a9 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com>
Date: Sun, 5 Jan 2025 16:20:33 -0800
Subject: [PATCH] chore: fix and refactor

---
 .github/workflows/benchmark-call.yml |  2 +-
 .github/workflows/benchmarks.yml     | 41 ++++++---------------
 ci/scripts/utils.sh                  | 55 +++++++++++++++++++---------
 3 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/.github/workflows/benchmark-call.yml b/.github/workflows/benchmark-call.yml
index fe75090d7..ac584fd32 100644
--- a/.github/workflows/benchmark-call.yml
+++ b/.github/workflows/benchmark-call.yml
@@ -202,7 +202,7 @@ jobs:
           APP_ARG="--app_log_blowup ${{ inputs.app_log_blowup }}"
           AGG_ARG="--leaf_log_blowup ${{ inputs.leaf_log_blowup }}"
           MAX_SEGMENT_LENGTH="--max_segment_length ${{ inputs.max_segment_length }}"
-          OUTPUT_PATH="--output_path $METRIC_PATH"
+          OUTPUT_PATH="--output_path $(pwd)/$METRIC_PATH"
           echo "INPUT_ARGS=${FEATURES} ${APP_ARG} ${AGG_ARG} ${MAX_SEGMENT_LENGTH} ${OUTPUT_PATH} ${INPUT_ARGS}" >> $GITHUB_ENV
 
       ##########################################################################
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 947eafc88..593224e30 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -231,39 +231,20 @@ jobs:
           # add metadata to markdown files
           source ci/scripts/utils.sh
           # Parse matrix data into associative arrays
-          while IFS= read -r line; do
+          while IFS= read -r metric_path; do
+            FLAMEGRAPHS=${{ github.event.inputs.flamegraphs || 'false' }}
             # Convert .json to .md
-            md_file="${line%.json}.md"
+            md_path="${metric_path%.json}.md"
 
-            id=$(echo '${{ needs.create-matrix.outputs.matrix }}' | jq -r --arg file "$line" '.[] |
-              select(.id == ($file | split("-")[0])) |
-              {
-                max_segment_length: .max_segment_length,
-                instance_type: .instance_type,
-                memory_allocator: .memory_allocator
-              }')
+            matrix=${{ needs.create-matrix.outputs.matrix }}
+            add_metadata_and_flamegraphs \
+              "$metric_path" \
+              "$md_path" \
+              "$matrix" \
+              "$COMMIT_URL" \
+              "$BENCHMARK_WORKFLOW_URL"
 
-            export FLAMEGRAPHS=${{ github.event.inputs.flamegraphs || 'false' }}
-            if [[ "$FLAMEGRAPHS" == 'true' ]]; then
-              python3 ci/scripts/metric_unify/flamegraph.py $line
-              s5cmd cp '.bench_metrics/flamegraphs/*.svg' "${S3_FLAMEGRAPHS_PATH}/${CURRENT_SHA}/"
-            fi
-
-            if [ ! -z "$id" ]; then
-              max_segment_length=$(echo "$id" | jq -r '.max_segment_length')
-              instance_type=$(echo "$id" | jq -r '.instance_type')
-              memory_allocator=$(echo "$id" | jq -r '.memory_allocator')
-
-              # Call add_metadata for each file with its corresponding data
-              add_metadata \
-                "$md_file" \
-                "$max_segment_length" \
-                "$instance_type" \
-                "$memory_allocator" \
-                "$COMMIT_URL" \
-                "$BENCHMARK_WORKFLOW_URL"
-            fi
-            cp "$md_file" "${TMP_DIR}/"
+            cp "$md_path" "${TMP_DIR}/"
           done <<< "$json_files"
 
           echo "" >> summary.md
diff --git a/ci/scripts/utils.sh b/ci/scripts/utils.sh
index 19f5a674d..6ef7c644b 100644
--- a/ci/scripts/utils.sh
+++ b/ci/scripts/utils.sh
@@ -1,22 +1,41 @@
-generate_markdown() {
+add_metadata_and_flamegraphs() {
     local metric_path="$1"
-    local metric_name="$2"
-    local s3_metrics_path="$3"
-    local openvm_root="$4"
+    local md_path="$2"
+    local matrix="$3"
+    local commit_url="$4"
+    local benchmark_workflow_url="$5"
+    # vars: $FLAMEGRAPHS, $S3_FLAMEGRAPHS_PATH, $CURRENT_SHA
 
-    if [[ -f $metric_path ]]; then
-        prev_path="${s3_metrics_path}/main-${metric_name}.json"
-        count=`s5cmd ls $prev_path | wc -l`
+    id=${metric_path%%-*} # first part before -
+    echo $id
 
-        if [[ $count -gt 0 ]]; then
-            s5cmd cp $prev_path prev.json
-            python3 ${openvm_root}/ci/scripts/metric_unify/main.py $metric_path --prev prev.json --aggregation-json ${openvm_root}/ci/scripts/metric_unify/aggregation.json > results.md
-        else
-            echo "No previous benchmark on main branch found"
-            python3 ${openvm_root}/ci/scripts/metric_unify/main.py $metric_path --aggregation-json ${openvm_root}/ci/scripts/metric_unify/aggregation.json > results.md
-        fi
-    else
-        echo "No benchmark metrics found at ${metric_path}"
+    inputs=$(echo "$matrix" | jq -r --arg id "$id" '.[] |
+      select(.id == $id) |
+      {
+        max_segment_length: .max_segment_length,
+        instance_type: .instance_type,
+        memory_allocator: .memory_allocator
+      }')
+
+    if [[ "$FLAMEGRAPHS" == 'true' ]]; then
+      repo_root=$(git rev-parse --show-toplevel)
+      python3 ${repo_root}/ci/scripts/metric_unify/flamegraph.py $metric_path
+      s5cmd cp "'${repo_root}/.bench_metrics/flamegraphs/*.svg'" "${S3_FLAMEGRAPHS_PATH}/${CURRENT_SHA}/"
+    fi
+
+    if [ ! -z "$inputs" ]; then
+      max_segment_length=$(echo "$inputs" | jq -r '.max_segment_length')
+      instance_type=$(echo "$inputs" | jq -r '.instance_type')
+      memory_allocator=$(echo "$inputs" | jq -r '.memory_allocator')
+
+      # Call add_metadata for each file with its corresponding data
+      add_metadata \
+        "$md_path" \
+        "$max_segment_length" \
+        "$instance_type" \
+        "$memory_allocator" \
+        "$commit_url" \
+        "$benchmark_workflow_url"
     fi
 }
 
@@ -27,13 +46,15 @@ add_metadata() {
     local memory_allocator="$4"
     local commit_url="$5"
     local benchmark_workflow_url="$6"
+    # vars: $FLAMEGRAPHS, $S3_FLAMEGRAPHS_PATH, $CURRENT_SHA
 
     echo "" >> $result_path
     if [[ "$FLAMEGRAPHS" == 'true' ]]; then
         echo "<details>" >> $result_path
         echo "<summary>Flamegraphs</summary>" >> $result_path
         echo "" >> $result_path
-        for file in .bench_metrics/flamegraphs/*.svg; do
+        repo_root=$(git rev-parse --show-toplevel)
+        for file in $repo_root/.bench_metrics/flamegraphs/*.svg; do
         filename=$(basename "$file")
             flamegraph_url=${S3_FLAMEGRAPHS_PATH}/${CURRENT_SHA}/${filename}
             echo "[![]($flamegraph_url)]($flamegraph_url)" >> $result_path