dfinity · berestovskyy · Dec 12, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/rs/embedders/BUILD.bazel b/rs/embedders/BUILD.bazel
@@ -1,6 +1,7 @@
 load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test")
 load("//bazel:defs.bzl", "rust_bench", "rust_ic_bench", "rust_ic_test_suite_with_extra_srcs")
 load("//bazel:fuzz_testing.bzl", "DEFAULT_RUSTC_FLAGS_FOR_FUZZING")
+load("//rs/tests:common.bzl", "MACRO_DEPENDENCIES", "UNIVERSAL_CANISTER_ENV", "UNIVERSAL_CANISTER_RUNTIME_DEPS")
 
 package(default_visibility = ["//visibility:public"])
 
@@ -193,12 +194,20 @@ rust_library(
     ] + DEPENDENCIES + DEV_DEPENDENCIES,
 )
 
+UNIVERSAL_CANISTER_TEST_DEPS = UNIVERSAL_CANISTER_RUNTIME_DEPS + [
+    "//rs/universal_canister/impl:universal_canister.module",
+]
+
+UNIVERSAL_CANISTER_TEST_ENV = UNIVERSAL_CANISTER_ENV | {
+    "UNIVERSAL_CANISTER_SERIALIZED_MODULE_PATH": "$(rootpath //rs/universal_canister/impl:universal_canister.module)",
+}
+
 rust_ic_bench(
     name = "stable_memory_bench",
     testonly = True,
     srcs = ["benches/stable_memory.rs"],
-    data = DATA,
-    env = ENV,
+    data = DATA + UNIVERSAL_CANISTER_TEST_DEPS,
+    env = ENV | UNIVERSAL_CANISTER_TEST_ENV,
     deps = [
         # Keep sorted.
         ":embedders_bench",
@@ -211,8 +220,8 @@ rust_ic_bench(
     testonly = True,
     srcs = ["benches/heap.rs"],
     compile_data = glob(["benches/test-data/*"]),
-    data = DATA,
-    env = ENV,
+    data = DATA + UNIVERSAL_CANISTER_TEST_DEPS,
+    env = ENV | UNIVERSAL_CANISTER_TEST_ENV,
     deps = [
         # Keep sorted.
         ":embedders_bench",

diff --git a/rs/embedders/benches/compilation.rs b/rs/embedders/benches/compilation.rs
@@ -13,7 +13,7 @@ fn generate_binaries() -> Vec<(String, NumInstructions, BinaryEncodedWasm)> {
     let mut result = vec![
         (
             "simple".to_string(),
-            NumInstructions::from(180_000),
+            NumInstructions::from(522_000),
             BinaryEncodedWasm::new(
                 wat::parse_str(
                     r#"
@@ -31,7 +31,7 @@ fn generate_binaries() -> Vec<(String, NumInstructions, BinaryEncodedWasm)> {
         ),
         (
             "empty".to_string(),
-            NumInstructions::from(90_000),
+            NumInstructions::from(432_000),
             BinaryEncodedWasm::new(
                 wat::parse_str(
                     r#"
@@ -50,7 +50,7 @@ fn generate_binaries() -> Vec<(String, NumInstructions, BinaryEncodedWasm)> {
     many_adds.push_str("))");
     result.push((
         "many_adds".to_string(),
-        NumInstructions::from(1_200_162_000),
+        NumInstructions::from(1_200_504_000),
         BinaryEncodedWasm::new(wat::parse_str(many_adds).expect("Failed to convert wat to wasm")),
     ));
 
@@ -61,7 +61,7 @@ fn generate_binaries() -> Vec<(String, NumInstructions, BinaryEncodedWasm)> {
     many_funcs.push(')');
     result.push((
         "many_funcs".to_string(),
-        NumInstructions::from(3_300_090_000),
+        NumInstructions::from(3_300_432_000),
         BinaryEncodedWasm::new(wat::parse_str(many_funcs).expect("Failed to convert wat to wasm")),
     ));
 
@@ -71,7 +71,7 @@ fn generate_binaries() -> Vec<(String, NumInstructions, BinaryEncodedWasm)> {
 
     result.push((
         "real_world_wasm".to_string(),
-        NumInstructions::from(12_187_254_000),
+        NumInstructions::from(12_569_958_000),
         real_world_wasm,
     ));
 
@@ -92,13 +92,17 @@ fn wasm_compilation(c: &mut Criterion) {
         let embedder = WasmtimeEmbedder::new(config.clone(), no_op_logger());
 
         group.bench_with_input(
-            BenchmarkId::from_parameter(name),
+            BenchmarkId::from_parameter(name.clone()),
             &(embedder, comp_cost, wasm),
             |b, (embedder, comp_cost, wasm)| {
                 b.iter_with_large_drop(|| {
                     let (c, r) = compile(embedder, wasm);
                     let r = r.expect("Failed to compile canister wasm");
-                    assert_eq!(*comp_cost, r.1.compilation_cost);
+                    assert_eq!(
+                        *comp_cost, r.1.compilation_cost,
+                        "update the reference compilation cost for '{name}' to {}",
+                        r.1.compilation_cost
+                    );
                     (c, r)
                 })
             },
@@ -124,7 +128,11 @@ fn wasm_deserialization(c: &mut Criterion) {
         let (_, serialized_module) = compile(&embedder, &wasm)
             .1
             .expect("Failed to compile canister wasm");
-        assert_eq!(comp_cost, serialized_module.compilation_cost);
+        assert_eq!(
+            comp_cost, serialized_module.compilation_cost,
+            "update the reference compilation cost for '{name}' to {}",
+            serialized_module.compilation_cost
+        );
         let serialized_module_bytes = serialized_module.bytes;
 
         group.bench_with_input(
@@ -158,14 +166,18 @@ fn wasm_validation_instrumentation(c: &mut Criterion) {
         let embedder = WasmtimeEmbedder::new(config.clone(), no_op_logger());
 
         group.bench_with_input(
-            BenchmarkId::from_parameter(name),
+            BenchmarkId::from_parameter(&name),
             &(embedder, comp_cost, wasm),
             |b, (embedder, comp_cost, wasm)| {
                 b.iter_with_large_drop(|| {
                     let (_, instrumentation_output) =
                         validate_and_instrument_for_testing(embedder, wasm)
                             .expect("Failed to validate and instrument canister wasm");
-                    assert_eq!(*comp_cost, instrumentation_output.compilation_cost);
+                    assert_eq!(
+                        *comp_cost, instrumentation_output.compilation_cost,
+                        "update the reference compilation cost for '{name}' to {}",
+                        instrumentation_output.compilation_cost
+                    );
                 })
             },
         );

diff --git a/rs/embedders/benches/embedders_bench/src/lib.rs b/rs/embedders/benches/embedders_bench/src/lib.rs
@@ -25,20 +25,30 @@ fn initialize_execution_test(
     const LARGE_INSTRUCTION_LIMIT: u64 = 1_000_000_000_000;
 
     // Get the memory type of the wasm module using ic_wasm_transform.
-    let module = Module::parse(wasm, true).unwrap();
-    let mut is_wasm64 = false;
-    if let Some(mem) = module.memories.first() {
-        if mem.memory64 {
-            is_wasm64 = true
+    let is_wasm64 = {
+        // 1f 8b is GZIP magic number, 08 is DEFLATE algorithm.
+        if wasm.starts_with(b"\x1f\x8b\x08") {
+            // Gzipped Wasm is wasm32.
+            false
+        } else {
+            let module = Module::parse(wasm, true).unwrap();
+            if let Some(mem) = module.memories.first() {
+                mem.memory64
+            } else {
+                // Wasm with no memory is wasm32.
+                false
+            }
         }
-    }
+    };
 
     let mut current = cell.borrow_mut();
     if current.is_some() {
         return;
     }
 
     let mut test = ExecutionTestBuilder::new()
+        .with_install_code_instruction_limit(LARGE_INSTRUCTION_LIMIT)
+        .with_install_code_slice_instruction_limit(LARGE_INSTRUCTION_LIMIT)
         .with_instruction_limit(LARGE_INSTRUCTION_LIMIT)
         .with_instruction_limit_without_dts(LARGE_INSTRUCTION_LIMIT)
         .with_slice_instruction_limit(LARGE_INSTRUCTION_LIMIT);
@@ -70,7 +80,7 @@ fn initialize_execution_test(
         PostSetupAction::None => {}
     }
 
-    // Execute a message to synce the new memory so that time isn't included in
+    // Execute a message to sync the new memory so that time isn't included in
     // benchmarks.
     test.ingress(canister_id, "update_empty", Encode!(&()).unwrap())
         .unwrap();

diff --git a/rs/execution_environment/benches/lib/src/common.rs b/rs/execution_environment/benches/lib/src/common.rs
@@ -209,7 +209,7 @@ fn run_benchmark<G, I, W, R>(
     G: AsRef<str>,
     I: AsRef<str>,
     W: AsRef<str>,
-    R: Fn(&ExecutionEnvironment, u64, BenchmarkArgs),
+    R: Fn(&str, &ExecutionEnvironment, u64, BenchmarkArgs),
 {
     let mut group = c.benchmark_group(group.as_ref());
     let mut bench_args = None;
@@ -231,7 +231,7 @@ fn run_benchmark<G, I, W, R>(
                     bench_args.as_ref().unwrap().clone()
                 },
                 |args| {
-                    routine(exec_env, expected_ops, args);
+                    routine(id.as_ref(), exec_env, expected_ops, args);
                 },
                 BatchSize::SmallInput,
             );
@@ -257,7 +257,7 @@ fn check_sandbox_defined() -> bool {
 pub fn run_benchmarks<G, R>(c: &mut Criterion, group: G, benchmarks: &[Benchmark], routine: R)
 where
     G: AsRef<str>,
-    R: Fn(&ExecutionEnvironment, u64, BenchmarkArgs) + Copy,
+    R: Fn(&str, &ExecutionEnvironment, u64, BenchmarkArgs) + Copy,
 {
     if !check_sandbox_defined() {
         return;

@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+set -ue
+##
+## Top-level script to run all execution and embedder benchmarks.
+## Usage:
+##     ./rs/execution_environment/benches/run-all-benchmarks.sh
+##
+
+printf "%-12s := %s\n" \
+    "REPEAT" "${REPEAT:=9}"
+
+RUN_BENCHMARK="${0%/*}/run-benchmark.sh"
+[ -x "${RUN_BENCHMARK}" ] || (echo "Error accessing script: ${RUN_BENCHMARK}" && exit 1)
+SUMMARIZE_RESULTS="${0%/*}/summarize-results.sh"
+[ -x "${SUMMARIZE_RESULTS}" ] || (echo "Error accessing script: ${SUMMARIZE_RESULTS}" && exit 1)
+
+run() {
+    local i="${1}"
+    local name="${2}"
+    local bench="${3}"
+    # File with best (min) results.
+    local min_file="${4}"
+    local filter="${5:-}"
+
+    # Counter file tracks the number of benchmark executions so far.
+    counter_file="${min_file%.*}.counter"
+    counter=$(cat "${counter_file}" 2>/dev/null || echo "-1")
+    # Quickly execute the benchmarks initially to identify any broken ones.
+    [ "${counter}" -eq "-1" ] && quick="yes" || quick="no"
+    [ -f "${min_file}" ] || counter="-1"
+    # Execute benchmark if needed.
+    if [ "${counter}" -lt "${i}" ]; then
+        echo "==> Running ${name} benchmarks ($((counter + 1)) of ${REPEAT})"
+        QUICK="${quick}" BENCH="${bench}" MIN_FILE="${min_file}" FILTER="${filter}" \
+            "${RUN_BENCHMARK}"
+        echo "$((counter + 1))" >"${counter_file}"
+    fi
+    # Summarize results if the benchmark was executed or if it's the final iteration.
+    if [ "${counter}" -lt "${i}" -o "${i}" = "${REPEAT}" ]; then
+        echo "==> Summarizing ${name} results:"
+        NAME="${name}" MIN_FILE="${min_file}" "${SUMMARIZE_RESULTS}"
+    fi
+}
+
+for i in $(seq 0 "${REPEAT}"); do
+    run "${i}" "Embedders Compilation" \
+        "//rs/embedders:compilation_bench" "EMBEDDERS_COMPILATION.min"
+    run "${i}" "Embedders Heap" \
+        "//rs/embedders:heap_bench" "EMBEDDERS_HEAP.min"
+    run "${i}" "Embedders Stable Memory" \
+        "//rs/embedders:stable_memory_bench" "EMBEDDERS_STABLE_MEMORY.min"
+    run "${i}" "System API Inspect Message" \
+        "//rs/execution_environment:execute_inspect_message_bench" "SYSTEM_API_INSPECT_MESSAGE.min"
+    run "${i}" "System API Query" \
+        "//rs/execution_environment:execute_query_bench" "SYSTEM_API_QUERY.min"
+    run "${i}" "System API Update" \
+        "//rs/execution_environment:execute_update_bench" "SYSTEM_API_UPDATE.min"
+    run "${i}" "Wasm Instructions" \
+        "//rs/execution_environment:wasm_instructions_bench" "WASM_INSTRUCTIONS.min"
+done
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+set -ue
+##
+## Helper script to run the specified `BENCH`
+## and store the best (min) results in the `MIN_FILE`.
+##
+
+DEPENDENCIES="awk bash bazel rg sed tail tee"
+which ${DEPENDENCIES} >/dev/null || (echo "Error checking dependencies: ${DEPENDENCIES}" && exit 1)
+
+QUICK="${QUICK:-no}"
+[ "${QUICK}" = "no" ] || BENCH_ARGS="--quick --output-format=bencher"
+
+printf "    %-12s := %s\n" \
+    "BENCH" "${BENCH:?Usage: BENCH='//rs/embedders:heap_bench' ${0}}" \
+    "BENCH_ARGS" "${BENCH_ARGS:=--warm-up-time=1 --measurement-time=1 --output-format=bencher}" \
+    "FILTER" "${FILTER:=}" \
+    "MIN_FILE" "${MIN_FILE:=${0##*/}.min}" \
+    "LOG_FILE" "${LOG_FILE:=${MIN_FILE%.*}.log}"
+
+TMP_FILE="${TMP_FILE:-${MIN_FILE%.*}.tmp}"
+
+bash -c "set -o pipefail; \
+    bazel run '${BENCH}' -- ${FILTER} ${BENCH_ARGS} \
+        2>&1 | tee '${LOG_FILE}' | rg '^(test .* )?bench:' --line-buffered \
+        | sed -uEe 's/^test (.+) ... bench: +/    > bench: \1 /' -Ee 's/^bench: +/    > quick: /'" \
+    || (
+        echo "Error running the benchmark:"
+        tail -10 "${LOG_FILE}" | sed 's/^/    ! /'
+        echo "For more details see: ${LOG_FILE}"
+        exit 1
+    )
+
+if ! [ -s "${MIN_FILE}" ]; then
+    echo "    Storing results in ${MIN_FILE}"
+    cat "${LOG_FILE}" | rg "^test .* bench:" >"${MIN_FILE}" || echo "    No results (quick run?)"
+else
+    echo "    Merging ${LOG_FILE} into ${MIN_FILE}"
+    rm -f "${TMP_FILE}"
+    cat "${LOG_FILE}" | rg "^test .* bench:" | while read new_bench; do
+        name=$(echo "${new_bench}" | sed -E 's/^test (.+) ... bench:.*/\1/')
+        new_result=$(echo "${new_bench}" | sed -E 's/.*bench: +([0-9]+) ns.*/\1/')
+
+        min_bench=$(rg -F " ${name} " "${MIN_FILE}" || true)
+        matches=$(echo "${min_bench}" | wc -l)
+        [ "${matches}" -le 1 ] || (echo "Error matching ${name} times in ${MIN_FILE}" && exit 1)
+        min_result=$(echo "${min_bench}" | sed -E 's/.*bench: +([0-9]+) ns.*/\1/')
+
+        if [ -z "${min_result}" ] || [ "${new_result}" -lt "${min_result}" ]; then
+            echo "    - improved ${name} time: $((new_result / 1000)) µs"
+            min_bench="${new_bench}"
+        fi
+        echo "${min_bench}" >>"${TMP_FILE}"
+    done
+    echo "    Updating results in ${MIN_FILE}"
+    mv -f "${TMP_FILE}" "${MIN_FILE}" 2>/dev/null || echo "    No results (quick run?)"
+fi
+rm -f "${LOG_FILE}"
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+set -ue
+##
+## Helper script to summarize the results in the `MIN_FILE`
+## comparing them to the `BASELINE_DIR` results.
+##
+
+DEPENDENCIES="awk rg sed"
+which ${DEPENDENCIES} >/dev/null || (echo "Error checking dependencies: ${DEPENDENCIES}" && exit 1)
+
+printf "    %-12s := %s\n" \
+    "MIN_FILE" "${MIN_FILE:=${0##*/}.min}" \
+    "BASELINE_DIR" "${BASELINE_DIR:=${0%/*}/baseline}"
+
+NAME="${NAME:-${MIN_FILE%.*}}"
+
+if [ ! -s "${MIN_FILE}" ]; then
+    echo "    No results to summarize in ${MIN_FILE} (quick run?)" && exit 0
+fi
+[ -d "${BASELINE_DIR}" ] || (echo "Error accessing directory: ${BASELINE_DIR}" && exit 1)
+
+BASELINE_FILE="${BASELINE_DIR}/${MIN_FILE##*/}"
+if [ ! -s "${BASELINE_FILE}" ]; then
+    echo "No baseline found: ${BASELINE_FILE}" && exit 0
+fi
+
+total_baseline="0"
+total_new="0"
+while read min_bench; do
+    name=$(echo "${min_bench}" | sed -E 's/^test (.+) ... bench:.*/\1/')
+    new_result=$(echo "${min_bench}" | sed -E 's/.*bench: +([0-9]+) ns.*/\1/')
+
+    baseline_bench=$(rg -F " ${name} " "${BASELINE_FILE}" || true)
+    matches=$(echo "${baseline_bench}" | wc -l)
+    [ "${matches}" -le 1 ] || (echo "Error matching ${name} times in ${BASELINE_FILE}" && exit 1)
+    baseline_result=$(echo "${baseline_bench}" | sed -E 's/.*bench: +([0-9]+) ns.*/\1/')
+
+    if [ -n "${new_result}" -a -n "${baseline_result}" ]; then
+        total_baseline=$((total_baseline + baseline_result))
+        total_new=$((total_new + new_result))
+    fi
+done <"${MIN_FILE}"
+baseline_commit=$(git rev-list --abbrev-commit -1 HEAD "${BASELINE_FILE}" | head -c 9)
+min_commit=$(git rev-list --abbrev-commit -1 HEAD | head -c 9)
+diff=$((diff = (total_new - total_baseline) * 100 * 10 / total_baseline))
+diff=$(echo "${diff}" | awk '{ print $0^2 <= (2 * 10)^2 ? 0 : $0 / 10 }')
+total_new_ms=$((total_new / 1000 / 1000))
+printf "    = ${baseline_commit}..${min_commit}: ${NAME}: total time: ${total_new_ms} ms "
+case "${diff}" in
+    0) echo "(no change)" ;;
+    -*) echo "(improved by ${diff}%)" ;;
+    *) echo "(regressed by ${diff}%)" ;;
+esac
diff --git a/rs/execution_environment/benches/system_api/diff-old-vs-new.sh b/rs/execution_environment/benches/system_api/diff-old-vs-new.sh
@@ -193,7 +193,8 @@ print_old_report_field() {
     local field="${3}"
     ## Apply name transformations to match between local (new) and remote (old) benchmarks
     ## ic0.call()/1B -> ic0.*call\(\).*1B
-    match=$(echo "${name}" | sed -Ee 's#([^()0-9A-Za-z_]+)#.*#g' -Ee 's#[()]#\\&#g' -Ee 's#_#.#g')
+    match=$(echo "${name}" \
+        | sed -Ee 's#/wasm32##' -Ee 's#([^()0-9A-Za-z_]+)#.*#g' -Ee 's#[()]#\\&#g' -Ee 's#_#.#g')
     set -o pipefail
     cat "${OLD_REPORT}" | rg "${match}" | sed -Ee 's# +# #g' \
         | awk -F '|' "NR == ${line} {printf \$$((${field} + 1))} NR == 3 {exit 1}" \