Skip to content

Commit

Permalink
PoC benchmark to track speed of k8s HPA reaction
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 705963358
  • Loading branch information
p3rf Team authored and copybara-github committed Dec 13, 2024
1 parent c2379cd commit 8f44d21
Show file tree
Hide file tree
Showing 11 changed files with 519 additions and 1 deletion.
2 changes: 1 addition & 1 deletion perfkitbenchmarker/benchmark_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def __init__(
self.uuid = '%s-%s' % (FLAGS.run_uri, uuid.uuid4())
self.always_call_cleanup = pkb_flags.ALWAYS_CALL_CLEANUP.value
self.dpb_service: dpb_service.BaseDpbService = None
self.container_cluster = None
self.container_cluster: container_service.BaseContainerCluster = None
self.key = None
self.relational_db = None
self.non_relational_db = None
Expand Down
17 changes: 17 additions & 0 deletions perfkitbenchmarker/container_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,23 @@ def WaitForResource(
run_cmd.append('--all')
RunKubectlCommand(run_cmd, timeout=timeout + 10)

@staticmethod
def WaitForSucceeded(
resource_name: str,
namespace: str | None = None,
timeout: int = vm_util.DEFAULT_TIMEOUT,
):
"""Waits for a resource to complete (i.e. .status.phase=='Succeeded')."""
run_cmd = [
'wait',
'--for=jsonpath={.status.phase}=Succeeded',
f'--timeout={timeout}s',
resource_name,
]
if namespace:
run_cmd.append(f'--namespace={namespace}')
RunKubectlCommand(run_cmd, timeout=timeout + 10)

@staticmethod
def WaitForRollout(
resource_name: str, timeout: int = vm_util.DEFAULT_TIMEOUT
Expand Down
88 changes: 88 additions & 0 deletions perfkitbenchmarker/data/container/kubernetes_hpa/fib.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
apiVersion: v1
kind: Namespace
metadata:
name: fib
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: fib
namespace: fib
spec:
selector:
matchLabels:
app: "fib"
template:
metadata:
labels:
app: "fib"
spec:
containers:
- name: "fib"
image: {{ fib_image }}
imagePullPolicy: "Always"
resources:
requests:
cpu: "1000m"
memory: "128Mi"
limits:
cpu: "2000m"
memory: "128Mi"
ports:
- containerPort: 5000
name: "web"
protocol: "TCP"
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: "fib"
namespace: "fib"
spec:
scaleTargetRef:
apiVersion: "apps/v1"
kind: "Deployment"
name: "fib"
minReplicas: 5
maxReplicas: 250
metrics:
- type: "Resource"
resource:
name: "cpu"
target:
type: "Utilization"
averageUtilization: 70
behavior:
scaleDown:
stabilizationWindowSeconds: 60
policies:
- periodSeconds: 15
type: "Percent"
value: 100
selectPolicy: "Min"
scaleUp:
stabilizationWindowSeconds: 0
policies:
- periodSeconds: 15
type: "Percent"
value: 100
- periodSeconds: 15
type: "Pods"
value: 1000
selectPolicy: "Max"
---
apiVersion: v1
kind: Service
metadata:
name: "fib"
namespace: "fib"
spec:
selector:
app: "fib"
type: LoadBalancer
externalTrafficPolicy: Cluster
ports:
- name: "tcp-port"
protocol: "TCP"
port: 5000
targetPort: 5000
11 changes: 11 additions & 0 deletions perfkitbenchmarker/data/docker/fibonacci/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Some combinations of python 3.13/C++17 cause build failures in pandas:
# https://github.com/cython/cython/issues/5790
# Avoid it by just picking 3.12.
FROM --platform=linux/amd64 python:3.12 as build

WORKDIR /
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
EXPOSE 5000
COPY . .
ENTRYPOINT [ "./entrypoint.sh" ]
2 changes: 2 additions & 0 deletions perfkitbenchmarker/data/docker/fibonacci/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
gunicorn perf_server:app -w 4 --threads 2 --bind 0.0.0.0:5000
38 changes: 38 additions & 0 deletions perfkitbenchmarker/data/docker/fibonacci/perf_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Toy flask app to inefficiently calculate Fibonacci numbers."""

import socket
import time
from flask import Flask

app = Flask(__name__)
hostname = socket.gethostname()


def calculate_fibonacci(n):
"""Returns the nth Fibonacci number (inefficient for the sake of CPU load).
Args:
n: nth Fibonacci number to be calculated.
"""
if n <= 1:
return n
else:
return calculate_fibonacci(n - 1) + calculate_fibonacci(n - 2)


@app.route('/calculate')
def do_calculation():
start_time = time.time()
result = calculate_fibonacci(30) # Adjust the Fibonacci number for load
end_time = time.time()

return [{
'result': result,
'calculation_time': end_time - start_time,
'timestamp': start_time,
'pod_id': hostname,
}]


if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)

Check failure

Code scanning / CodeQL

Flask app is run in debug mode High

A Flask app appears to be run in debug mode. This may allow an attacker to run arbitrary code through the debugger.
47 changes: 47 additions & 0 deletions perfkitbenchmarker/data/locust/rampup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Locust file to simulate a "stepped" rampup of load."""

import locust


class Rampup(locust.HttpUser):
# Send 1QPS (per user)
wait_time = locust.constant_throughput(1)

@locust.task
def rampup(self):
# Close the connection after each request (or else users won't get load
# balanced to new pods.)
headers = {"Connection": "close"}

self.client.get("/calculate", headers=headers)


class StagesShape(locust.LoadTestShape):
"""Locust LoadTestShape to simulate a "stepped" rampup."""

# pyformat: disable
# pylint: disable=bad-whitespace
_stages = [
{"endtime": 60, "users": 1}, # 1 rps for 1m
{"endtime": 360, "users": 20}, # 20 rps for 5m
{"endtime": 420, "users": 40}, # 40 rps for 1m
{"endtime": 480, "users": 60}, # 60 rps for 1m
{"endtime": 540, "users": 90}, # 90 rps for 1m
{"endtime": 660, "users": 120}, # 120 rps for 2m
{"endtime": 780, "users": 150}, # 150 rps for 2m
{"endtime": 900, "users": 1}, # 1 rps for 2m
# --------------
# Total: 15m
]
# pyformat: enable

def tick(self):
run_time = self.get_run_time()

for stage in self._stages:
if run_time < stage["endtime"]:
user_count = stage["users"]
spawn_rate = 100 # spawn all new users roughly immediately (over 1s)
return (user_count, spawn_rate)

return None
15 changes: 15 additions & 0 deletions perfkitbenchmarker/data/locust/simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Locust file to flood the SUT."""

from locust import HttpUser
from locust import task


class Simple(HttpUser):

@task
def simple(self):
# Close the connection after each request (or else users won't get load
# balanced to new pods.)
headers = {"Connection": "close"}

self.client.get("/calculate", headers=headers)
141 changes: 141 additions & 0 deletions perfkitbenchmarker/linux_benchmarks/kubernetes_hpa_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs a locust based hpa benchmark on a k8s cluster."""

import functools
from typing import Any, Dict, List

from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import benchmark_spec as bm_spec
from perfkitbenchmarker import configs
from perfkitbenchmarker import container_service
from perfkitbenchmarker.linux_packages import locust
from perfkitbenchmarker.sample import Sample

FLAGS = flags.FLAGS

flags.DEFINE_string(
'kubernetes_hpa_runtime_class_name',
None,
'A custom runtimeClassName to apply to the pods.',
)

BENCHMARK_NAME = 'kubernetes_hpa'
BENCHMARK_CONFIG = """
kubernetes_hpa:
description: Benchmarks how quickly hpa reacts to load
vm_groups:
default:
vm_spec: *default_dual_core
vm_count: 1
container_specs:
kubernetes_fib:
image: fibonacci
container_registry: {}
container_cluster:
cloud: GCP
type: Kubernetes
vm_count: 1
vm_spec: *default_dual_core
nodepools:
fibpool:
vm_count: 3
vm_spec:
GCP:
machine_type: n2-standard-4
AWS:
machine_type: m6i.xlarge
Azure:
machine_type: Standard_D4s_v5
"""


def GetConfig(user_config: Dict[str, Any]) -> Dict[str, Any]:
"""Load and return benchmark config.
Args:
user_config: user supplied configuration (flags and config file)
Returns:
loaded benchmark configuration
"""
config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)

return config


def _PrepareCluster(benchmark_spec: bm_spec.BenchmarkSpec):
"""Prepares a cluster to run the hpa benchmark."""
cluster: container_service.KubernetesCluster = (
benchmark_spec.container_cluster
)
fib_image = benchmark_spec.container_specs['kubernetes_fib'].image

cluster.ApplyManifest(
'container/kubernetes_hpa/fib.yaml.j2',
fib_image=fib_image,
runtime_class_name=FLAGS.kubernetes_hpa_runtime_class_name,
)

cluster.WaitForResource('deploy/fib', 'available', namespace='fib')


def _PrepareLocust(benchmark_spec: bm_spec.BenchmarkSpec):
"""Prepares a vm to run locust."""
vm = benchmark_spec.vms[0]
locust.Install(vm)
locust.Prep(vm, locust.Locustfile.RAMPUP)


def Prepare(benchmark_spec: bm_spec.BenchmarkSpec):
"""Install fib workload (and associated hpa) on the K8s Cluster.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
"""

prepare_fns = [
functools.partial(_PrepareCluster, benchmark_spec),
functools.partial(_PrepareLocust, benchmark_spec),
]

background_tasks.RunThreaded(lambda f: f(), prepare_fns)


def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> List[Sample]:
"""Run a benchmark against the Nginx server."""

# Get the SUT address
stdout, _, _ = container_service.RunKubectlCommand([
'get',
'-n',
'fib',
'svc/fib',
'-o',
"jsonpath='{.status.loadBalancer.ingress[0].ip}'",
])
addr = 'http://' + stdout.strip() + ':5000'

# Run locust against the SUT
vm = benchmark_spec.vms[0]
samples = locust.Run(vm, addr)

return list(samples)


def Cleanup(benchmark_spec):
"""Cleanup."""
del benchmark_spec
Loading

0 comments on commit 8f44d21

Please sign in to comment.