subsystem-bench: add regression tests for availability read and write (…

…#3311) ### What's been done - `subsystem-bench` has been split into two parts: a cli benchmark runner and a library. - The cli runner is quite simple. It just allows us to run `.yaml` based test sequences. Now it should only be used to run benchmarks during development. - The library is used in the cli runner and in regression tests. Some code is changed to make the library independent of the runner. - Added first regression tests for availability read and write that replicate existing test sequences. ### How we run regression tests - Regression tests are simply rust integration tests without the harnesses. - They should only be compiled under the `subsystem-benchmarks` feature to prevent them from running with other tests. - This doesn't work when running tests with `nextest` in CI, so additional filters have been added to the `nextest` runs. - Each benchmark run takes a different time in the beginning, so we "warm up" the tests until their CPU usage differs by only 1%. - After the warm-up, we run the benchmarks a few more times and compare the average with the exception using a precision. ### What is still wrong? - I haven't managed to set up approval voting tests. The spread of their results is too large and can't be narrowed down in a reasonable amount of time in the warm-up phase. - The tests start an unconfigurable prometheus endpoint inside, which causes errors because they use the same 9999 port. I disable it with a flag, but I think it's better to extract the endpoint launching outside the test, as we already do with `valgrind` and `pyroscope`. But we still use `prometheus` inside the tests. ### Future work * #3528 * #3529 * #3530 * #3531 --------- Co-authored-by: Alexander Samusev <[email protected]>
paritytech · Mar 1, 2024 · f0e589d · f0e589d
1 parent 6f81a4a
commit f0e589d
Show file tree

Hide file tree

Showing 35 changed files with 711 additions and 411 deletions.
diff --git a/.gitlab/pipeline/test.yml b/.gitlab/pipeline/test.yml
@@ -25,6 +25,7 @@ test-linux-stable:
     # "upgrade_version_checks_should_work" is currently failing
     - |
       time cargo nextest run \
+        --filter-expr 'not deps(/polkadot-subsystem-bench/)' \
         --workspace \
         --locked \
         --release \
@@ -69,7 +70,7 @@ test-linux-stable-runtime-benchmarks:
     # but still want to have debug assertions.
     RUSTFLAGS: "-Cdebug-assertions=y -Dwarnings"
   script:
-    - time cargo nextest run --workspace --features runtime-benchmarks benchmark --locked --cargo-profile testnet
+    - time cargo nextest run --filter-expr 'not deps(/polkadot-subsystem-bench/)' --workspace --features runtime-benchmarks benchmark --locked --cargo-profile testnet
 
 # can be used to run all tests
 # test-linux-stable-all:

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/polkadot/node/network/availability-distribution/Cargo.toml b/polkadot/node/network/availability-distribution/Cargo.toml
@@ -36,3 +36,14 @@ sc-network = { path = "../../../../substrate/client/network" }
 futures-timer = "3.0.2"
 assert_matches = "1.4.0"
 polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" }
+polkadot-subsystem-bench = { path = "../../subsystem-bench" }
+
+
+[[test]]
+name = "availability-distribution-regression-bench"
+path = "tests/availability-distribution-regression-bench.rs"
+harness = false
+required-features = ["subsystem-benchmarks"]
+
+[features]
+subsystem-benchmarks = []
diff --git a/...ode/network/availability-distribution/tests/availability-distribution-regression-bench.rs b/...ode/network/availability-distribution/tests/availability-distribution-regression-bench.rs
@@ -0,0 +1,113 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! availability-read regression tests
+//!
+//! TODO: Explain the test case after configuration adjusted to Kusama
+//!
+//! Subsystems involved:
+//! - availability-distribution
+//! - bitfield-distribution
+//! - availability-store
+
+use polkadot_subsystem_bench::{
+	availability::{benchmark_availability_write, prepare_test, TestDataAvailability, TestState},
+	configuration::{PeerLatency, TestConfiguration},
+	usage::BenchmarkUsage,
+};
+
+const BENCH_COUNT: usize = 3;
+const WARM_UP_COUNT: usize = 20;
+const WARM_UP_PRECISION: f64 = 0.01;
+
+fn main() -> Result<(), String> {
+	let mut messages = vec![];
+
+	// TODO: Adjust the test configurations to Kusama values
+	let mut config = TestConfiguration::default();
+	config.latency = Some(PeerLatency { mean_latency_ms: 30, std_dev: 2.0 });
+	config.n_validators = 1000;
+	config.n_cores = 200;
+	config.max_validators_per_core = 5;
+	config.min_pov_size = 5120;
+	config.max_pov_size = 5120;
+	config.peer_bandwidth = 52428800;
+	config.bandwidth = 52428800;
+	config.connectivity = 75;
+	config.num_blocks = 3;
+	config.generate_pov_sizes();
+
+	warm_up(config.clone())?;
+	let usage = benchmark(config.clone());
+
+	messages.extend(usage.check_network_usage(&[
+		("Received from peers", 4330.0, 0.05),
+		("Sent to peers", 15900.0, 0.05),
+	]));
+	messages.extend(usage.check_cpu_usage(&[
+		("availability-distribution", 0.025, 0.05),
+		("bitfield-distribution", 0.085, 0.05),
+		("availability-store", 0.180, 0.05),
+	]));
+
+	if messages.is_empty() {
+		Ok(())
+	} else {
+		eprintln!("{}", messages.join("\n"));
+		Err("Regressions found".to_string())
+	}
+}
+
+fn warm_up(config: TestConfiguration) -> Result<(), String> {
+	println!("Warming up...");
+	let mut prev_run: Option<BenchmarkUsage> = None;
+	for _ in 0..WARM_UP_COUNT {
+		let curr = run(config.clone());
+		if let Some(ref prev) = prev_run {
+			let av_distr_diff =
+				curr.cpu_usage_diff(prev, "availability-distribution").expect("Must exist");
+			let bitf_distr_diff =
+				curr.cpu_usage_diff(prev, "bitfield-distribution").expect("Must exist");
+			let av_store_diff =
+				curr.cpu_usage_diff(prev, "availability-store").expect("Must exist");
+			if av_distr_diff < WARM_UP_PRECISION &&
+				bitf_distr_diff < WARM_UP_PRECISION &&
+				av_store_diff < WARM_UP_PRECISION
+			{
+				return Ok(())
+			}
+		}
+		prev_run = Some(curr);
+	}
+
+	Err("Can't warm up".to_string())
+}
+
+fn benchmark(config: TestConfiguration) -> BenchmarkUsage {
+	println!("Benchmarking...");
+	let usages: Vec<BenchmarkUsage> = (0..BENCH_COUNT).map(|_| run(config.clone())).collect();
+	let usage = BenchmarkUsage::average(&usages);
+	println!("{}", usage);
+	usage
+}
+
+fn run(config: TestConfiguration) -> BenchmarkUsage {
+	let mut state = TestState::new(&config);
+	let (mut env, _protocol_config) =
+		prepare_test(config.clone(), &mut state, TestDataAvailability::Write, false);
+	env.runtime()
+		.block_on(benchmark_availability_write("data_availability_write", &mut env, state))
+}
diff --git a/polkadot/node/network/availability-recovery/Cargo.toml b/polkadot/node/network/availability-recovery/Cargo.toml
@@ -41,6 +41,13 @@ sc-network = { path = "../../../../substrate/client/network" }
 
 polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }
 polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" }
+polkadot-subsystem-bench = { path = "../../subsystem-bench" }
+
+[[test]]
+name = "availability-recovery-regression-bench"
+path = "tests/availability-recovery-regression-bench.rs"
+harness = false
+required-features = ["subsystem-benchmarks"]
 
 [features]
 subsystem-benchmarks = []
diff --git a/polkadot/node/network/availability-recovery/tests/availability-recovery-regression-bench.rs b/polkadot/node/network/availability-recovery/tests/availability-recovery-regression-bench.rs
@@ -0,0 +1,103 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! availability-write regression tests
+//!
+//! TODO: Explain the test case after configuration adjusted to Kusama
+//!
+//! Subsystems involved:
+//! - availability-recovery
+
+use polkadot_subsystem_bench::{
+	availability::{
+		benchmark_availability_read, prepare_test, DataAvailabilityReadOptions,
+		TestDataAvailability, TestState,
+	},
+	configuration::{PeerLatency, TestConfiguration},
+	usage::BenchmarkUsage,
+};
+
+const BENCH_COUNT: usize = 3;
+const WARM_UP_COUNT: usize = 10;
+const WARM_UP_PRECISION: f64 = 0.01;
+
+fn main() -> Result<(), String> {
+	let mut messages = vec![];
+
+	// TODO: Adjust the test configurations to Kusama values
+	let options = DataAvailabilityReadOptions { fetch_from_backers: true };
+	let mut config = TestConfiguration::default();
+	config.latency = Some(PeerLatency { mean_latency_ms: 100, std_dev: 1.0 });
+	config.n_validators = 300;
+	config.n_cores = 20;
+	config.min_pov_size = 5120;
+	config.max_pov_size = 5120;
+	config.peer_bandwidth = 52428800;
+	config.bandwidth = 52428800;
+	config.num_blocks = 3;
+	config.connectivity = 90;
+	config.generate_pov_sizes();
+
+	warm_up(config.clone(), options.clone())?;
+	let usage = benchmark(config.clone(), options.clone());
+
+	messages.extend(usage.check_network_usage(&[
+		("Received from peers", 102400.000, 0.05),
+		("Sent to peers", 0.335, 0.05),
+	]));
+	messages.extend(usage.check_cpu_usage(&[("availability-recovery", 3.850, 0.05)]));
+
+	if messages.is_empty() {
+		Ok(())
+	} else {
+		eprintln!("{}", messages.join("\n"));
+		Err("Regressions found".to_string())
+	}
+}
+
+fn warm_up(config: TestConfiguration, options: DataAvailabilityReadOptions) -> Result<(), String> {
+	println!("Warming up...");
+	let mut prev_run: Option<BenchmarkUsage> = None;
+	for _ in 0..WARM_UP_COUNT {
+		let curr = run(config.clone(), options.clone());
+		if let Some(ref prev) = prev_run {
+			let diff = curr.cpu_usage_diff(prev, "availability-recovery").expect("Must exist");
+			if diff < WARM_UP_PRECISION {
+				return Ok(())
+			}
+		}
+		prev_run = Some(curr);
+	}
+
+	Err("Can't warm up".to_string())
+}
+
+fn benchmark(config: TestConfiguration, options: DataAvailabilityReadOptions) -> BenchmarkUsage {
+	println!("Benchmarking...");
+	let usages: Vec<BenchmarkUsage> =
+		(0..BENCH_COUNT).map(|_| run(config.clone(), options.clone())).collect();
+	let usage = BenchmarkUsage::average(&usages);
+	println!("{}", usage);
+	usage
+}
+
+fn run(config: TestConfiguration, options: DataAvailabilityReadOptions) -> BenchmarkUsage {
+	let mut state = TestState::new(&config);
+	let (mut env, _protocol_config) =
+		prepare_test(config.clone(), &mut state, TestDataAvailability::Read(options), false);
+	env.runtime()
+		.block_on(benchmark_availability_read("data_availability_read", &mut env, state))
+}
diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml
@@ -8,9 +8,13 @@ license.workspace = true
 readme = "README.md"
 publish = false
 
+[lib]
+name = "polkadot_subsystem_bench"
+path = "src/lib/lib.rs"
+
 [[bin]]
 name = "subsystem-bench"
-path = "src/subsystem-bench.rs"
+path = "src/cli/subsystem-bench.rs"
 
 # Prevent rustdoc error. Already documented from top-level Cargo.toml.
 doc = false