diff --git a/e2e/tools/validator/scripts/stressor.sh b/e2e/tools/validator/scripts/stressor.sh index c16465429a..2058caaf2b 100755 --- a/e2e/tools/validator/scripts/stressor.sh +++ b/e2e/tools/validator/scripts/stressor.sh @@ -13,32 +13,80 @@ run() { echo " ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾" } +# stepwise load curve: each step is 20 seconds +declare -a load_curve_stepwise=( + 0:20 + 20:20 + 40:20 + 60:20 + 80:20 + 100:20 + 80:20 + 60:20 + 40:20 + 20:20 + 0:20 +) + +# default load curve: varying durations +declare -a load_curve_default=( + 0:5 + 10:20 + 25:20 + 50:20 + 75:20 + 100:30 + 75:20 + 50:20 + 25:20 + 10:20 + 0:5 +) + main() { + local total_time=0 + local repeats=5 + local curve_type="default" + + while getopts "t:r:c:" opt; do + case $opt in + t) total_time=$OPTARG ;; + c) curve_type=$OPTARG ;; + *) echo "Usage: $0 [-t total_time_in_seconds] [-c curve_type(default|stepwise)]" >&2; exit 1 ;; + esac + done + + # Select load curve based on curve_type + local -a load_curve + case $curve_type in + "default") load_curve=("${load_curve_default[@]}") ;; + "stepwise") load_curve=("${load_curve_stepwise[@]}") ;; + *) echo "Invalid curve type. Use 'default' or 'stepwise'" >&2; exit 1 ;; + esac local cpus cpus=$(nproc) - # load and time - local -a load_curve=( - 0:5 - 10:20 - 25:20 - 50:20 - 75:20 - 100:30 - 75:20 - 50:20 - 25:20 - 10:20 - 0:5 - ) - - # sleep 5 so that first run and the second run look the same + # calculate the total duration of one cycle of the load curve + local total_cycle_time=0 + for x in "${load_curve[@]}"; do + local time="${x##*:}" + total_cycle_time=$((total_cycle_time + time)) + done + + # calculate the repeats if total_time is provided + if [ "$total_time" -gt 0 ]; then + repeats=$((total_time / total_cycle_time)) + fi + + echo "Total time: $total_time seconds, Repeats: $repeats, Curve type: $curve_type" + + # sleep 5 so that first run and the second run look the same echo "Warmup .." run stress-ng --cpu "$cpus" --cpu-method ackermann --cpu-load 0 --timeout 5 - for i in $(seq 1 5); do - echo "Running: $i/5" + for i in $(seq 1 "$repeats"); do + echo "Running: $i/$repeats" for x in "${load_curve[@]}"; do local load="${x%%:*}" local time="${x##*:}s" diff --git a/e2e/tools/validator/src/validator/config/__init__.py b/e2e/tools/validator/src/validator/config/__init__.py index f4f49bc606..31fed7b14b 100644 --- a/e2e/tools/validator/src/validator/config/__init__.py +++ b/e2e/tools/validator/src/validator/config/__init__.py @@ -40,11 +40,17 @@ class Prometheus(NamedTuple): job: PrometheusJob +class Stressor(NamedTuple): + total_runtime_seconds: int + curve_type: str + + class Validator(NamedTuple): log_level: str remote: Remote metal: Metal prometheus: Prometheus + stressor: Stressor validations_file: str def __repr__(self): @@ -105,6 +111,15 @@ def load(config_file: str) -> Validator: job=job, ) + stressor_config = config["stressor"] + if not stressor_config: + stressor = Stressor(total_runtime_seconds=1200, curve_type="default") + else: + stressor = Stressor( + total_runtime_seconds=stressor_config.get("total_runtime_seconds", 1200), + curve_type=stressor_config.get("curve_type", "default"), + ) + validations_file = config.get("validations_file", "validations.yaml") log_level = config.get("log_level", "warn") @@ -112,6 +127,7 @@ def load(config_file: str) -> Validator: remote=remote, metal=metal, prometheus=prometheus, + stressor=stressor, validations_file=validations_file, log_level=log_level, ) diff --git a/e2e/tools/validator/src/validator/stresser/__init__.py b/e2e/tools/validator/src/validator/stresser/__init__.py index a3fc747892..6ff1d17258 100644 --- a/e2e/tools/validator/src/validator/stresser/__init__.py +++ b/e2e/tools/validator/src/validator/stresser/__init__.py @@ -27,6 +27,8 @@ def __init__(self, config: config.Remote): self.user = config.user self.port = config.port self.password = config.password + self.total_runtime_seconds = config.total_runtime_seconds + self.curve_type = config.curve_type self.ssh_client = paramiko.SSHClient() self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) @@ -69,11 +71,13 @@ def run_script(self, script_path: str) -> ScriptResult: # ruff: noqa: S108 (Suppressed hard-coded path because we want to intentionally copy stress.sh inside `/tmp` dir) target_script = "/tmp/stress.sh" + cli_options = f"-t {self.total_runtime_seconds} -c {self.curve_type}" + command = f"{target_script} {cli_options}" self.copy(script_path, target_script) # ruff: noqa: DTZ005 (Suppressed non-time-zone aware object creation as it is not necessary for this use case) start_time = datetime.now() - _, stdout, stderr = self.ssh_client.exec_command(target_script) + _, stdout, stderr = self.ssh_client.exec_command(command) # ruff: noqa: T201 (Suppressed as printing is intentional and necessary in this context) print("stdout output:") diff --git a/e2e/tools/validator/tests/validator/config/test_config.py b/e2e/tools/validator/tests/validator/config/test_config.py index eb0f6ee5e6..03eaf4b55b 100644 --- a/e2e/tools/validator/tests/validator/config/test_config.py +++ b/e2e/tools/validator/tests/validator/config/test_config.py @@ -58,6 +58,24 @@ def test_minimal_config_file(minimal_config_file): assert prometheus.job.vm == "vm" +@pytest.fixture +def stressor_config_file(config_file): + return config_file( + """ +stressor: + total_runtime_seconds: 1200 + curve_type: default + """ + ) + + +def test_stressor_config(stressor_config_file): + config = load(stressor_config_file) + stressor = config.stressor + assert stressor.total_runtime_seconds == 1200 + assert stressor.curve_type == "default" + + @pytest.fixture def config_file_use_password(config_file): return config_file( diff --git a/e2e/tools/validator/validations.yaml b/e2e/tools/validator/validations.yaml index d0878072fa..d94616dd75 100644 --- a/e2e/tools/validator/validations.yaml +++ b/e2e/tools/validator/validations.yaml @@ -2,6 +2,9 @@ config: mapping: actual: metal predicted: vm + stressor: + total_runtime_seconds: 1200 + curve_type: default validations: - name: node-rapl - kepler-package