From ea2f9a72674ae7fd3e810a12026bfc26c693e1c1 Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 6 Nov 2023 13:04:34 -0600 Subject: [PATCH] feat: gperftools (#3096) Did this a bit ago and didn't find it super useful, but for just reporting memory usage might be useful enough to get in Example run ``` Load Average: 4.45, 7.50, 6.81 Dumping heap profile to ./ultra_honk_bench.0001.heap (100 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0002.heap (205 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0003.heap (305 MB currently in use) ------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------ construct_proof_ultra/sha256/10/repeats:1 292 ms 232 ms 3 Dumping heap profile to ./ultra_honk_bench.0004.heap (410 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0005.heap (516 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0006.heap (620 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0007.heap (724 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0008.heap (881 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0009.heap (981 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0010.heap (1082 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0011.heap (4536 MB allocated cumulatively, 908 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0012.heap (1186 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0013.heap (1289 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0014.heap (1393 MB currently in use) construct_proof_ultra/keccak/10/repeats:1 682 ms 462 ms 2 Dumping heap profile to ./ultra_honk_bench.0015.heap (7705 MB allocated cumulatively, 1117 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0016.heap (1501 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0017.heap (1613 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0018.heap (1725 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0019.heap (1837 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0020.heap (1941 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0021.heap (2069 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0022.heap (2170 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0023.heap (2271 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0024.heap (2375 MB currently in use) Dumping heap profile to ./ultra_honk_bench.0025.heap (2479 MB currently in use) construct_proof_ultra/ecdsa_verification/10/repeats:1 1161 ms 765 ms 1 Dumping heap profile to ./ultra_honk_bench.0026.heap (11195 MB allocated cumulatively, 1275 MB currently in use) construct_proof_ultra/merkle_membership/10/repeats:1 218 ms 180 ms 4 Dumping heap profile to ./ultra_honk_bench.0027.heap (Exiting, 1218 MB in use) File: ultra_honk_bench Type: inuse_space Showing nodes accounting for 1424773.03kB, 99.87% of 1426698.52kB total Dropped 117 nodes (cum <= 7133.49kB) flat flat% sum% cum cum% 805340.70kB 56.45% 56.45% 805340.70kB 56.45% std::vector::_M_realloc_insert 234258.12kB 16.42% 72.87% 234258.12kB 16.42% barretenberg::scalar_multiplication::pippenger_runtime_state::pippenger_runtime_state 172033.31kB 12.06% 84.93% 172033.31kB 12.06% barretenberg::Polynomial::operator= 172033.28kB 12.06% 96.98% 172033.28kB 12.06% barretenberg::Polynomial::Polynomial 32896.03kB 2.31% 99.29% 32896.03kB 2.31% barretenberg::scalar_multiplication::point_table_alloc 8192kB 0.57% 99.86% 24576kB 1.72% proof_system::CircuitBuilderBase::add_variable 14.66kB 0.001% 99.86% 11857.81kB 0.83% proof_system::UltraCircuitBuilder_::create_new_range_constraint 2.44kB 0.00017% 99.86% 611271.87kB 42.85% proof_system::honk::UltraComposer_::create_instance 1.66kB 0.00012% 99.86% 262193.34kB 18.38% proof_system::honk::ProverInstance_::compute_proving_key 0.39kB 2.7e-05% 99.87% 1424833.58kB 99.87% benchmark::internal::BenchmarkRunner::DoNIterations 0.20kB 1.4e-05% 99.87% 267154.41kB 18.73% proof_system::honk::UltraComposer_::compute_commitment_key 0.16kB 1.1e-05% 99.87% 11843kB 0.83% proof_system::UltraCircuitBuilder_::create_range_list 0.05kB 3.3e-06% 99.87% 32896.08kB 2.31% barretenberg::srs::factories::FileCrsFactory::get_prover_crs 0.03kB 2.2e-06% 99.87% 1424837.85kB 99.87% benchmark::RunSpecifiedBenchmarks 0 0% 99.87% 1426698.52kB 100% __libc_start_main 0 0% 99.87% 1426698.52kB 100% _start 0 0% 99.87% 96284.88kB 6.75% barretenberg::constexpr_for 0 0% 99.87% 32896.03kB 2.31% barretenberg::srs::factories::FileProverCrs::FileProverCrs 0 0% 99.87% 1424833.18kB 99.87% bench_utils::construct_proof_with_specified_num_iterations 0 0% 99.87% 625033.88kB 43.81% bench_utils::generate_keccak_test_circuit 0 0% 99.87% 188524.32kB 13.21% bench_utils::generate_sha256_test_circuit 0 0% 99.87% 1424833.07kB 99.87% bench_utils::get_prover 0 0% 99.87% 1424833.19kB 99.87% benchmark::internal::(anonymous namespace)::RunInThread 0 0% 99.87% 1424833.19kB 99.87% benchmark::internal::BenchmarkInstance::Run 0 0% 99.87% 1424833.58kB 99.87% benchmark::internal::BenchmarkRunner::DoOneRepetition 0 0% 99.87% 1424837.85kB 99.87% main 0 0% 99.87% 21068kB 1.48% proof_system::UltraCircuitBuilder_::create_big_mul_gate 0 0% 99.87% 11842.84kB 0.83% proof_system::UltraCircuitBuilder_::create_dummy_constraints 0 0% 99.87% 780018.88kB 54.67% proof_system::UltraCircuitBuilder_::create_gates_from_plookup_accumulators 0 0% 99.87% 12052.66kB 0.84% proof_system::UltraCircuitBuilder_::decompose_into_default_range 0 0% 99.87% 16384.12kB 1.15% proof_system::compute_first_and_last_lagrange_polynomials 0 0% 99.87% 90112.69kB 6.32% proof_system::construct_selector_polynomials 0 0% 99.87% 32768.34kB 2.30% proof_system::construct_wire_polynomials_base 0 0% 99.87% 344114.02kB 24.12% proof_system::honk::ProverInstance_::ProverInstance_ 0 0% 99.87% 81920.69kB 5.74% proof_system::honk::ProverInstance_::compute_witness 0 0% 99.87% 122880.94kB 8.61% proof_system::honk::flavor::ProvingKey_::ProvingKey_ 0 0% 99.87% 21068kB 1.48% proof_system::plonk::stdlib::field_t::add_two 0 0% 99.87% 12052.66kB 0.84% proof_system::plonk::stdlib::field_t::create_range_constraint 0 0% 99.87% 362656kB 25.42% proof_system::plonk::stdlib::keccak::chi 0 0% 99.87% 624759.88kB 43.79% proof_system::plonk::stdlib::keccak::hash 0 0% 99.87% 141312kB 9.90% proof_system::plonk::stdlib::keccak::iota 0 0% 99.87% 237596.88kB 16.65% proof_system::plonk::stdlib::keccak::normalize_and_rotate 0 0% 99.87% 623561.03kB 43.71% proof_system::plonk::stdlib::keccak::sponge_absorb 0 0% 99.87% 23308.16kB 1.63% proof_system::plonk::stdlib::keccak::theta 0 0% 99.87% 542532.19kB 38.03% proof_system::plonk::stdlib::plookup_read::get_lookup_accumulators 0 0% 99.87% 83341.19kB 5.84% proof_system::plonk::stdlib::plookup_read::read_from_1_to_2_table 0 0% 99.87% 188524.32kB 13.21% proof_system::plonk::stdlib::sha256 0 0% 99.87% 32384kB 2.27% proof_system::plonk::stdlib::sha256_plookup::choose 0 0% 99.87% 50025kB 3.51% proof_system::plonk::stdlib::sha256_plookup::extend_witness 0 0% 99.87% 94208kB 6.60% proof_system::plonk::stdlib::sha256_plookup::majority 0 0% 99.87% 188524.32kB 13.21% proof_system::plonk::stdlib::sha256_plookup::sha256 0 0% 99.87% 188504.19kB 13.21% proof_system::plonk::stdlib::sha256_plookup::sha256_block 0 0% 99.87% 267154.20kB 18.73% std::_Sp_counted_ptr_inplace::_Sp_counted_ptr_inplace ``` --------- Co-authored-by: ludamad --- barretenberg/cpp/.gitignore | 4 +- barretenberg/cpp/CMakePresets.json | 22 +++++++-- .../cpp/scripts/collect_heap_information.sh | 47 +++++++++++++++++++ 3 files changed, 67 insertions(+), 6 deletions(-) create mode 100755 barretenberg/cpp/scripts/collect_heap_information.sh diff --git a/barretenberg/cpp/.gitignore b/barretenberg/cpp/.gitignore index 9a5226b72fa..811aef3620e 100644 --- a/barretenberg/cpp/.gitignore +++ b/barretenberg/cpp/.gitignore @@ -8,4 +8,6 @@ CMakeUserPresets.json .vscode/settings.json # to be unignored when we agree on clang-tidy rules .clangd -acir_tests \ No newline at end of file +acir_tests +# we may download go in scripts/collect_heap_information.sh +go*.tar.gz diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 7927b0ba494..0174d8b873b 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -147,6 +147,18 @@ "CMAKE_BUILD_TYPE": "Debug" } }, + { + "name": "gperftools", + "displayName": "Debugging build with gperftools on Clang-16", + "description": "Build with gperf", + "inherits": "clang16", + "binaryDir": "build-gperftools", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo", + "CMAKE_EXE_LINKER_FLAGS": "-ltcmalloc", + "CXXFLAGS": "-fno-omit-frame-pointer" + } + }, { "name": "wasm", "displayName": "Build for WASM", @@ -283,6 +295,11 @@ "inherits": "clang16", "configurePreset": "fuzzing" }, + { + "name": "gperftools", + "inherits": "clang16", + "configurePreset": "gperftools" + }, { "name": "smt-verification", "inherits": "clang16", @@ -335,11 +352,6 @@ "name": "xray-1thread", "configurePreset": "xray-1thread", "inherits": "default" - }, - { - "name": "xray", - "configurePreset": "xray", - "inherits": "default" } ], "testPresets": [ diff --git a/barretenberg/cpp/scripts/collect_heap_information.sh b/barretenberg/cpp/scripts/collect_heap_information.sh new file mode 100755 index 00000000000..1d25c5a791c --- /dev/null +++ b/barretenberg/cpp/scripts/collect_heap_information.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -eu + +PRESET=gperftools +ONLY_PROCESS=${1:-} +EXECUTABLE=${2:-ultra_honk_rounds_bench} + +# Move above script dir. +cd $(dirname $0)/.. + +# Configure and build with heap profiling preset. + +cmake --preset $PRESET +cmake --build --preset $PRESET + +cd build-$PRESET + +if [ -z "$ONLY_PROCESS" ]; then + # Clear old heap profile data. + rm -f $EXECUTABLE.heap* + + # Run application with heap profiling to a file with prefix '$EXECUTABLE'. + HEAPPROFILE=./$EXECUTABLE ./bin/$EXECUTABLE +fi + +# Download and install Go +if [ ! -d ~/go ]; then + ARCHIVE=go1.21.3.linux-amd64.tar.gz + echo "Downloading and installing Go..." + wget https://go.dev/dl/$ARCHIVE + tar -C ~/ -xvf $ARCHIVE + rm $ARCHIVE + export PATH=$PATH:~/go/bin +fi + +# Install pprof +if [ ! -f ~/go/bin/pprof ]; then + echo "Installing pprof..." + ~/go/bin/go install github.com/google/pprof@latest +fi + +# Collect the heap files +files=(./$EXECUTABLE.*.heap) +# Find the middle index based on the count +middle_index=$(( (${#files[@]} + 1) / 2 - 1)) +# Process the heap profile with pprof +~/go/bin/pprof --text ./bin/$EXECUTABLE ${files[$middle_index]}