Skip to content

Commit

Permalink
chore: make validator stressor configuration on runtime and load
Browse files Browse the repository at this point in the history
Signed-off-by: Huamin Chen <[email protected]>
  • Loading branch information
rootfs committed Nov 12, 2024
1 parent 82dc44a commit 21d7736
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 19 deletions.
84 changes: 66 additions & 18 deletions e2e/tools/validator/scripts/stressor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,80 @@ run() {
echo " ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾"
}

# stepwise load curve: each step is 20 seconds
declare -a load_curve_stepwise=(
0:20
20:20
40:20
60:20
80:20
100:20
80:20
60:20
40:20
20:20
0:20
)

# default load curve: varying durations
declare -a load_curve_default=(
0:5
10:20
25:20
50:20
75:20
100:30
75:20
50:20
25:20
10:20
0:5
)

main() {
local total_time=0
local repeats=5
local curve_type="default"

while getopts "t:r:c:" opt; do
case $opt in
t) total_time=$OPTARG ;;
c) curve_type=$OPTARG ;;
*) echo "Usage: $0 [-t total_time_in_seconds] [-c curve_type(default|stepwise)]" >&2; exit 1 ;;
esac
done

# Select load curve based on curve_type
local -a load_curve
case $curve_type in
"default") load_curve=("${load_curve_default[@]}") ;;
"stepwise") load_curve=("${load_curve_stepwise[@]}") ;;
*) echo "Invalid curve type. Use 'default' or 'stepwise'" >&2; exit 1 ;;
esac

local cpus
cpus=$(nproc)

# load and time
local -a load_curve=(
0:5
10:20
25:20
50:20
75:20
100:30
75:20
50:20
25:20
10:20
0:5
)

# sleep 5 so that first run and the second run look the same
# calculate the total duration of one cycle of the load curve
local total_cycle_time=0
for x in "${load_curve[@]}"; do
local time="${x##*:}"
total_cycle_time=$((total_cycle_time + time))
done

# calculate the repeats if total_time is provided
if [ "$total_time" -gt 0 ]; then
repeats=$((total_time / total_cycle_time))
fi

echo "Total time: $total_time seconds, Repeats: $repeats, Curve type: $curve_type"

# sleep 5 so that first run and the second run look the same
echo "Warmup .."
run stress-ng --cpu "$cpus" --cpu-method ackermann --cpu-load 0 --timeout 5

for i in $(seq 1 5); do
echo "Running: $i/5"
for i in $(seq 1 "$repeats"); do
echo "Running: $i/$repeats"
for x in "${load_curve[@]}"; do
local load="${x%%:*}"
local time="${x##*:}s"
Expand Down
16 changes: 16 additions & 0 deletions e2e/tools/validator/src/validator/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ class Prometheus(NamedTuple):
job: PrometheusJob


class Stressor(NamedTuple):
total_runtime_seconds: int
curve_type: str


class Validator(NamedTuple):
log_level: str
remote: Remote
metal: Metal
prometheus: Prometheus
stressor: Stressor
validations_file: str

def __repr__(self):
Expand Down Expand Up @@ -105,13 +111,23 @@ def load(config_file: str) -> Validator:
job=job,
)

stressor_config = config["stressor"]
if not stressor_config:
stressor = Stressor(total_runtime_seconds=1200, curve_type="default")
else:
stressor = Stressor(
total_runtime_seconds=stressor_config.get("total_runtime_seconds", 1200),
curve_type=stressor_config.get("curve_type", "default"),
)

validations_file = config.get("validations_file", "validations.yaml")
log_level = config.get("log_level", "warn")

return Validator(
remote=remote,
metal=metal,
prometheus=prometheus,
stressor=stressor,
validations_file=validations_file,
log_level=log_level,
)
6 changes: 5 additions & 1 deletion e2e/tools/validator/src/validator/stresser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def __init__(self, config: config.Remote):
self.user = config.user
self.port = config.port
self.password = config.password
self.total_runtime_seconds = config.total_runtime_seconds
self.curve_type = config.curve_type

self.ssh_client = paramiko.SSHClient()
self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
Expand Down Expand Up @@ -69,11 +71,13 @@ def run_script(self, script_path: str) -> ScriptResult:

# ruff: noqa: S108 (Suppressed hard-coded path because we want to intentionally copy stress.sh inside `/tmp` dir)
target_script = "/tmp/stress.sh"
cli_options = f"-t {self.total_runtime_seconds} -c {self.curve_type}"
command = f"{target_script} {cli_options}"
self.copy(script_path, target_script)

# ruff: noqa: DTZ005 (Suppressed non-time-zone aware object creation as it is not necessary for this use case)
start_time = datetime.now()
_, stdout, stderr = self.ssh_client.exec_command(target_script)
_, stdout, stderr = self.ssh_client.exec_command(command)

# ruff: noqa: T201 (Suppressed as printing is intentional and necessary in this context)
print("stdout output:")
Expand Down
18 changes: 18 additions & 0 deletions e2e/tools/validator/tests/validator/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,24 @@ def test_minimal_config_file(minimal_config_file):
assert prometheus.job.vm == "vm"


@pytest.fixture
def stressor_config_file(config_file):
return config_file(
"""
stressor:
total_runtime_seconds: 1200
curve_type: default
"""
)


def test_stressor_config(stressor_config_file):
config = load(stressor_config_file)
stressor = config.stressor
assert stressor.total_runtime_seconds == 1200
assert stressor.curve_type == "default"


@pytest.fixture
def config_file_use_password(config_file):
return config_file(
Expand Down
3 changes: 3 additions & 0 deletions e2e/tools/validator/validations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ config:
mapping:
actual: metal
predicted: vm
stressor:
total_runtime_seconds: 1200
curve_type: default

validations:
- name: node-rapl - kepler-package
Expand Down

0 comments on commit 21d7736

Please sign in to comment.