perf: EXC-1818: Add scripts to run all embedder and EE benchmarks (#3054

) This PR adds scripts to run all embedder and execution environment benchmarks. The script to run all benchmarks is `run-all-benchmarks.sh`, which repeats benchmarks 10 times. The repetition time can be adjusted with the `REPEAT` environment variable: ``` REPEAT=3 run-all-benchmarks.sh ``` The first time the script runs, it quickly executes the benchmarks to identify any broken ones. At the end, the script summarizes the changes. Example run: ``` ==> Summarizing Embedders Heap results: MIN_FILE := EMBEDDERS_HEAP.min BASELINE_DIR := ./rs/execution_environment/benches/baseline = b36fae7..44e6fe4: System API Inspect Message total time: 672 ms (improved by -65.2%) + inspect/wasm64/ic0_accept_message()* time regressed by 770.6% - inspect/wasm32/ic0_msg_method_name_copy()/1B time improved by -90% - inspect/wasm32/ic0_accept_message()* time improved by -19% ```
dfinity · Dec 12, 2024 · 02b2518 · 02b2518
1 parent 226870d
commit 02b2518
Show file tree

Hide file tree

Showing 3 changed files with 199 additions and 0 deletions.
diff --git a/rs/execution_environment/benches/run-all-benchmarks.sh b/rs/execution_environment/benches/run-all-benchmarks.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+set -ue
+##
+## Top-level script to run all execution and embedder benchmarks.
+## Usage:
+##     ./rs/execution_environment/benches/run-all-benchmarks.sh | tee summary.txt
+##
+## The best (minimum) results are located in the `*.min`` files in the current directory.
+## These should be manually copied to `rs/execution_environment/benches/baseline/`.
+## A summary of the results is printed to the standard output.
+##
+
+printf "%-12s := %s\n" \
+    "REPEAT" "${REPEAT:=9}" >&2
+
+RUN_BENCHMARK="${0%/*}/run-benchmark.sh"
+[ -x "${RUN_BENCHMARK}" ] || (echo "Error accessing script: ${RUN_BENCHMARK}" >&2 && exit 1)
+SUMMARIZE_RESULTS="${0%/*}/summarize-results.sh"
+[ -x "${SUMMARIZE_RESULTS}" ] || (echo "Error accessing script: ${SUMMARIZE_RESULTS}" >&2 && exit 1)
+
+run() {
+    local i="${1}"
+    local name="${2}"
+    local bench="${3}"
+    # File with best (min) results.
+    local min_file="${4}"
+    local filter="${5:-}"
+
+    # Counter file tracks the number of benchmark executions so far.
+    counter_file="${min_file%.*}.counter"
+    counter=$(cat "${counter_file}" 2>/dev/null || echo "-1")
+    # Quickly execute the benchmarks initially to identify any broken ones.
+    [ "${counter}" -eq "-1" ] && quick="yes" || quick="no"
+    [ -f "${min_file}" ] || counter="-1"
+    # Execute benchmark if needed.
+    if [ "${counter}" -lt "${i}" ]; then
+        echo "==> Running ${name} benchmarks ($((counter + 1)) of ${REPEAT})" >&2
+        QUICK="${quick}" BENCH="${bench}" MIN_FILE="${min_file}" FILTER="${filter}" \
+            "${RUN_BENCHMARK}"
+        echo "$((counter + 1))" >"${counter_file}"
+    fi
+    # Summarize results if the benchmark was executed or if it's the final iteration.
+    if [ "${counter}" -lt "${i}" -o "${i}" = "${REPEAT}" ]; then
+        echo "==> Summarizing ${name} results:" >&2
+        NAME="${name}" MIN_FILE="${min_file}" "${SUMMARIZE_RESULTS}"
+    fi
+}
+
+for i in $(seq 0 "${REPEAT}"); do
+    run "${i}" "Embedders Compilation" \
+        "//rs/embedders:compilation_bench" "EMBEDDERS_COMPILATION.min"
+    run "${i}" "Embedders Heap" \
+        "//rs/embedders:heap_bench" "EMBEDDERS_HEAP.min"
+    run "${i}" "Embedders Stable Memory" \
+        "//rs/embedders:stable_memory_bench" "EMBEDDERS_STABLE_MEMORY.min"
+    run "${i}" "System API Inspect Message" \
+        "//rs/execution_environment:execute_inspect_message_bench" "SYSTEM_API_INSPECT_MESSAGE.min"
+    run "${i}" "System API Query" \
+        "//rs/execution_environment:execute_query_bench" "SYSTEM_API_QUERY.min"
+    run "${i}" "System API Update" \
+        "//rs/execution_environment:execute_update_bench" "SYSTEM_API_UPDATE.min"
+    run "${i}" "Wasm Instructions" \
+        "//rs/execution_environment:wasm_instructions_bench" "WASM_INSTRUCTIONS.min"
+done
diff --git a/rs/execution_environment/benches/run-benchmark.sh b/rs/execution_environment/benches/run-benchmark.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+set -ue
+##
+## Helper script to run the specified `BENCH`
+## and store the best (minimum) results in the `MIN_FILE`.
+##
+
+DEPENDENCIES="awk bash bazel rg sed tail tee"
+which ${DEPENDENCIES} >/dev/null || (echo "Error checking dependencies: ${DEPENDENCIES}" >&2 && exit 1)
+
+QUICK="${QUICK:-no}"
+[ "${QUICK}" = "no" ] || BENCH_ARGS="--quick --output-format=bencher"
+
+printf "    %-12s := %s\n" \
+    "BENCH" "${BENCH:?Usage: BENCH='//rs/embedders:heap_bench' ${0}}" \
+    "BENCH_ARGS" "${BENCH_ARGS:=--warm-up-time=1 --measurement-time=1 --output-format=bencher}" \
+    "FILTER" "${FILTER:=}" \
+    "MIN_FILE" "${MIN_FILE:=${0##*/}.min}" \
+    "LOG_FILE" "${LOG_FILE:=${MIN_FILE%.*}.log}" >&2
+
+TMP_FILE="${TMP_FILE:-${MIN_FILE%.*}.tmp}"
+
+# Run the benchmark and capture its output in the `LOG_FILE`.
+bash -c "set -o pipefail; \
+    bazel run '${BENCH}' -- ${FILTER} ${BENCH_ARGS} \
+        2>&1 | tee '${LOG_FILE}' | rg '^(test .* )?bench:' --line-buffered \
+        | sed -uEe 's/^test (.+) ... bench: +/> bench: \1 /' -Ee 's/^bench: +/> quick: /'" \
+    || (
+        echo "Error running the benchmark:"
+        tail -10 "${LOG_FILE}" | sed 's/^/! /'
+        echo "For more details see: ${LOG_FILE}"
+        exit 1
+    ) >&2
+
+if ! [ -s "${MIN_FILE}" ]; then
+    echo "    Storing results in ${MIN_FILE}" >&2
+    cat "${LOG_FILE}" | rg "^test .* bench:" >"${MIN_FILE}" \
+        || echo "    No results found in ${LOG_FILE} (quick run?)" >&2
+else
+    echo "    Merging ${LOG_FILE} into ${MIN_FILE}" >&2
+    rm -f "${TMP_FILE}"
+    cat "${LOG_FILE}" | rg "^test .* bench:" | while read new_bench; do
+        name="${new_bench#test }"
+        name="${name% ... bench:*}"
+        new_result_ns="${new_bench#* ... bench: }"
+        new_result_ns="${new_result_ns% ns/iter*}"
+
+        min_bench=$(rg -F "test ${name} ... bench:" "${MIN_FILE}" || true)
+        min_result_ns="${min_bench#* ... bench: }"
+        min_result_ns="${min_result_ns% ns/iter*}"
+
+        if [ -z "${min_result_ns}" ] || [ "${new_result_ns}" -lt "${min_result_ns}" ]; then
+            echo "^ improved: ${name} time: $((new_result_ns / 1000)) µs"
+            min_bench="${new_bench}"
+        fi
+        echo "${min_bench}" >>"${TMP_FILE}"
+    done
+    echo "    Updating results in ${MIN_FILE}" >&2
+    mv -f "${TMP_FILE}" "${MIN_FILE}" 2>/dev/null || echo "    No results to update (quick run?)" >&2
+fi
+rm -f "${LOG_FILE}"
diff --git a/rs/execution_environment/benches/summarize-results.sh b/rs/execution_environment/benches/summarize-results.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+set -ue
+##
+## Helper script to summarize the results in the `MIN_FILE`
+## comparing them to the `BASELINE_DIR` results.
+##
+
+DEPENDENCIES="awk rg sed"
+which ${DEPENDENCIES} >/dev/null || (echo "Error checking dependencies: ${DEPENDENCIES}" >&2 && exit 1)
+
+printf "    %-12s := %s\n" \
+    "MIN_FILE" "${MIN_FILE:=${0##*/}.min}" \
+    "BASELINE_DIR" "${BASELINE_DIR:=${0%/*}/baseline}" >&2
+
+NAME="${NAME:-${MIN_FILE%.*}}"
+TMP_FILE="${TMP_FILE:-${MIN_FILE%.*}.tmp}"
+
+if [ ! -s "${MIN_FILE}" ]; then
+    echo "    No results to summarize in ${MIN_FILE} (quick run?)" >&2 && exit 0
+fi
+BASELINE_FILE="${BASELINE_DIR}/${MIN_FILE##*/}"
+if [ ! -s "${BASELINE_FILE}" ]; then
+    echo "    No baseline found in ${BASELINE_FILE}" >&2 && exit 0
+fi
+
+echo_diff() {
+    diff=$(((${2} - ${1}) * 100 * 10 / ${1}))
+    awk "BEGIN { print (${diff})^2 <= (2 * 10)^2 ? 0 : ${diff} / 10 }"
+}
+
+# Compare the `MIN_FILE` to `BASELINE_FILE`.
+total_baseline_ns="0"
+total_new_ns="0"
+rm -f "${TMP_FILE}"
+# Example content:
+#   test update/wasm64/baseline/empty loop ... bench:     2720243 ns/iter (+/- 48904)
+while read min_bench; do
+    name="${min_bench#test }"
+    name="${name% ... bench:*}"
+    new_result_ns="${min_bench#* ... bench: }"
+    new_result_ns="${new_result_ns% ns/iter*}"
+
+    baseline_bench=$(rg -F "test ${name} ... bench:" "${BASELINE_FILE}" || true)
+    baseline_result_ns="${baseline_bench#* ... bench: }"
+    baseline_result_ns="${baseline_result_ns% ns/iter*}"
+
+    if [ -n "${new_result_ns}" -a -n "${baseline_result_ns}" ]; then
+        total_baseline_ns=$((total_baseline_ns + baseline_result_ns))
+        total_new_ns=$((total_new_ns + new_result_ns))
+        echo "$(echo_diff "${baseline_result_ns}" "${new_result_ns}") ${name}" >>"${TMP_FILE}"
+    fi
+done <"${MIN_FILE}"
+
+# Produce a summary.
+baseline_commit=$(git rev-list --abbrev-commit -1 HEAD "${BASELINE_FILE}")
+min_commit=$(git rev-list --abbrev-commit -1 HEAD)
+total_diff=$(echo_diff "${total_baseline_ns}" "${total_new_ns}")
+printf "= ${baseline_commit}..${min_commit}: ${NAME} total time: $((total_new_ns / 1000 / 1000)) ms "
+case "${total_diff}" in
+    0) echo "(no change)" ;;
+    -*) echo "(improved by ${total_diff}%)" ;;
+    *) echo "(regressed by ${total_diff}%)" ;;
+esac
+
+# Produce top regressed/improved details.
+if [ "${total_diff}" != "0" ]; then
+    cat "${TMP_FILE}" | sort -rn | rg '^[1-9]' | head -5 | while read diff name; do
+        echo "+ ${name} time regressed by ${diff}%"
+    done
+    cat "${TMP_FILE}" | sort -n | rg '^-' | head -5 | while read diff name; do
+        echo "- ${name} time improved by ${diff}%"
+    done
+fi
+# rm -f "${TMP_FILE}"