diff --git a/test/performance/.env.example b/test/performance/.env.example new file mode 100644 index 000000000000..1bd197098cd2 --- /dev/null +++ b/test/performance/.env.example @@ -0,0 +1,19 @@ +# Clusterloader home directory (checkout https://github.com/kubernetes/perf-tests) +export CL2_HOME_DIR="/Users/johny/perf-tests/clusterloader2" + +# Run the performance test with Kueue (this requires Kueue to be pre-deployed to the cluster) +# or without Kueue +export USE_KUEUE=false + +# Test iterations: +# number-of-small-jobs number-of-large-jobs job-replica-running-time test-timeout cluster-queue-CPU-quota cluster-queue-memory-quota +export EXPERIMENTS=( + "10 2 0 2s 3m 100 100Gi" + "20 2 0 2s 5m 100 100Gi" +) + +# Kubeconfig file location +export KUBECONFIG="$HOME/.kube/config" + +# Kubernetes kind +export PROVIDER="gke" diff --git a/test/performance/.gitignore b/test/performance/.gitignore new file mode 100644 index 000000000000..c1499e75fc48 --- /dev/null +++ b/test/performance/.gitignore @@ -0,0 +1,4 @@ +*report*/ +prerequisites/cluster-queue.yaml +tmp_manifests/ +.env diff --git a/test/performance/README.md b/test/performance/README.md new file mode 100644 index 000000000000..5c9ad9876b70 --- /dev/null +++ b/test/performance/README.md @@ -0,0 +1,67 @@ +# Kueue Performance Testing + +## Measurements + +### Job startup latency + +How fast do jobs transition from `created` to `started` state? +Time spent between the transition from `job.CreationTimestamp.Time` to `job.Status.StartTime.Time` state. + +High Job startup latency in Kueue is expected when the total quota is not enough to schedule all jobs immediately, because the jobs need to queue. + +### Job startup throughput + +The best workload admission rate per second within 1 minute intervals. +The rate is measured every 5 seconds (see more details in [PromQL examples](https://prometheus.io/docs/prometheus/latest/querying/examples/#subquery)): + +`max_over_time(sum(rate(kueue_admitted_workloads_total{cluster_queue="{{$clusterQueue}}"}[1m]))[{{$testTimeout}}:5s])` + +This measurement is not accurate if the cluster quota is big enough to schedule all workloads of the test immediately, because Kueue immediately admits all the workloads and the `kueue_admitted_workloads_total` never increases. In this case, the PromQL query returns 0. +## How to run the test? + +### Prerequisites + +1. Deploy [Kueue](https://github.com/kubernetes-sigs/kueue/blob/main/docs/setup/install.md) +2. Make sure you have `kubectl`, [jq](https://stedolan.github.io/jq/download/), [golang version](https://github.com/mikefarah/yq) of `yq` and `go` +3. Checkout `Clusterloader2` framework: https://github.com/kubernetes/perf-tests and build `clusterloader` binary: + + * change to `clusterloader2` directory + * run `go build -o clusterloader './cmd/'` + +### Run the test + +1. Copy an environment file example to `.env` file: + + * `cp .env.example .env` + +2. Edit the environment variables + +| Variable | Description | +| ----------- | ----------- | +| CL2_HOME_DIR | Clusterloader home directory (checkout https://github.com/kubernetes/perf-tests) | +| USE_KUEUE | Run the performance test with Kueue (this requires Kueue to be pre-deployed to the cluster) or without Kueue | +| EXPERIMENTS | Configuration of iterations iterations (see configuration example in the file) | +| KUBECONFIG | Kubeconfig file location | +| PROVIDER | Kubernetes kind (tested on `gke` only) + +3. Run the `run-test.sh` file + +### Test results + +Every test execution creates a `report_` directory inside `TEST_CONFIG_DIR` with `summary.csv` file, where the following metrics are available: + +* P50 Job Create to start latency (ms) +* P90 Job Create to start latency (ms) +* P50 Job Start to complete latency (ms) +* P90 Job Start to complete latency (ms) +* Max Job Throughput (max jobs/s) +* Total Jobs +* Total Pods +* Duration (s) + +Additionally, the following metrics are added to the results only for reference. Kueue doesn't influence them directly. + +* Avg Pod Waiting time (s) +* P90 Pod Waiting time (s) +* Avg Pod Completion time (s) +* P90 Pod Completion time (s) diff --git a/test/performance/config.yaml b/test/performance/config.yaml new file mode 100644 index 000000000000..1e4d361c6342 --- /dev/null +++ b/test/performance/config.yaml @@ -0,0 +1,181 @@ +{{$MODE := DefaultParam .MODE "Indexed"}} +{{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}} + +{{$smallJobs := DefaultParam .CL2_SMALL_JOBS 10}} +{{$mediumJobs := DefaultParam .CL2_MEDIUM_JOBS 2}} +{{$largeJobs := DefaultParam .CL2_LARGE_JOBS 0}} + +{{$namespaces := DefaultParam .CL2_NAMESPACES 1}} + +{{$smallJobsPerNamespace := DivideInt $smallJobs $namespaces}} +{{$mediumJobsPerNamespace := DivideInt $mediumJobs $namespaces}} +{{$largeJobsPerNamespace := DivideInt $largeJobs $namespaces}} + +{{$smallJobSize := 5}} +{{$mediumJobSize := 20}} +{{$largeJobSize := 100}} + +{{$jobRunningTime := DefaultParam .CL2_JOB_RUNNING_TIME "30s"}} + +{{$clusterQueue := "default-cluster-queue"}} +{{$localQueue := "local-queue"}} + +{{$testTimeout := DefaultParam .CL2_TEST_TIMEOUT "5m"}} + +{{$namespacePrefix := "queue-test"}} + +{{$useKueue := DefaultParam .CL2_USE_KUEUE false}} + +name: batch + +namespace: + number: {{$namespaces}} + prefix: {{$namespacePrefix}} + +tuningSets: +- name: UniformQPS + qpsLoad: + qps: {{$LOAD_TEST_THROUGHPUT}} + +steps: +- name: Start measurements + measurements: + - Identifier: Timer + Method: Timer + Params: + action: start + label: job_performance + - Identifier: WaitForFinishedJobs + Method: WaitForFinishedJobs + Params: + action: start + labelSelector: group = test-job + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: start + labelSelector: group = test-job + - Identifier: GenericPrometheusQuery + Method: GenericPrometheusQuery + Params: + action: start + metricName: Job (Kueue) API performance + metricVersion: v1 + unit: s + queries: + - name: total_jobs_scheduled + query: count(kube_job_info{namespace=~"{{$namespacePrefix}}.*"}) + - name: total_pods_scheduled + query: count(kube_pod_info{namespace=~"{{$namespacePrefix}}.*"}) + - name: avg_pod_running_time + query: (avg(kube_pod_completion_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"})) + - name: perc_90_pod_completion_time + query: quantile(0.90, kube_pod_completion_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"}) + - name: avg_pod_waiting_time + query: (avg(kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_created{namespace=~"{{$namespacePrefix}}.*"})) + - name: perc_90_pod_waiting_time + query: quantile(0.90, kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_created{namespace=~"{{$namespacePrefix}}.*"}) + - name: max_job_throughput + query: max_over_time(sum(rate(kueue_admitted_workloads_total{cluster_queue="{{$clusterQueue}}"}[1m]))[{{$testTimeout}}:5s]) +- name: Sleep + measurements: + - Identifier: sleep + Method: Sleep + Params: + duration: 10s +{{if $useKueue}} +- name: Create local queue + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: 1 + tuningSet: UniformQPS + objectBundle: + - basename: {{$localQueue}} + objectTemplatePath: "local-queue.yaml" + templateFillMap: + ClusterQueue: {{$clusterQueue}} +{{end}} +- name: Create {{$MODE}} jobs + phases: + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$smallJobsPerNamespace}} + tuningSet: UniformQPS + objectBundle: + - basename: small + objectTemplatePath: "job.yaml" + templateFillMap: + UseKueue: {{$useKueue}} + Replicas: {{$smallJobSize}} + Mode: {{$MODE}} + Sleep: {{$jobRunningTime}} + LocalQueue: "{{$localQueue}}-0" + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$mediumJobsPerNamespace}} + tuningSet: UniformQPS + objectBundle: + - basename: medium + objectTemplatePath: "job.yaml" + templateFillMap: + UseKueue: {{$useKueue}} + Replicas: {{$mediumJobSize}} + Mode: {{$MODE}} + Sleep: {{$jobRunningTime}} + LocalQueue: "{{$localQueue}}-0" + - namespaceRange: + min: 1 + max: {{$namespaces}} + replicasPerNamespace: {{$largeJobsPerNamespace}} + tuningSet: UniformQPS + objectBundle: + - basename: large + objectTemplatePath: "job.yaml" + templateFillMap: + UseKueue: {{$useKueue}} + Replicas: {{$largeJobSize}} + Mode: {{$MODE}} + Sleep: {{$jobRunningTime}} + LocalQueue: "{{$localQueue}}-0" +- name: Wait for {{$MODE}} jobs to finish + measurements: + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: gather + timeout: {{$testTimeout}} + - Identifier: WaitForFinishedJobs + Method: WaitForFinishedJobs + Params: + action: gather + timeout: {{$testTimeout}} +- name: Stop Timer + measurements: + - Identifier: Timer + Method: Timer + Params: + action: stop + label: job_performance +- name: Gather Timer + measurements: + - Identifier: Timer + Method: Timer + Params: + action: gather +- name: Sleep + measurements: + - Identifier: sleep + Method: Sleep + Params: + duration: 30s +- name: Gather Prometheus measurements + measurements: + - Identifier: GenericPrometheusQuery + Method: GenericPrometheusQuery + Params: + action: gather + enableViolations: true diff --git a/test/performance/job.yaml b/test/performance/job.yaml new file mode 100644 index 000000000000..ce82f3be0b41 --- /dev/null +++ b/test/performance/job.yaml @@ -0,0 +1,30 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{.Name}} + labels: + group: test-job + {{if .UseKueue}} + annotations: + kueue.x-k8s.io/queue-name: {{.LocalQueue}} + {{end}} +spec: + suspend: {{.UseKueue}} + parallelism: {{.Replicas}} + completions: {{.Replicas}} + completionMode: {{.Mode}} + template: + metadata: + labels: + group: test-pod + spec: + containers: + - name: {{.Name}} + image: gcr.io/k8s-staging-perf-tests/sleep:v0.0.3 + args: + - {{.Sleep}} + resources: + requests: + cpu: "200m" + memory: "100Mi" + restartPolicy: Never diff --git a/test/performance/local-queue.yaml b/test/performance/local-queue.yaml new file mode 100644 index 000000000000..88422fcb181d --- /dev/null +++ b/test/performance/local-queue.yaml @@ -0,0 +1,6 @@ +apiVersion: kueue.x-k8s.io/v1alpha2 +kind: LocalQueue +metadata: + name: {{.Name}} +spec: + clusterQueue: {{.ClusterQueue}} diff --git a/test/performance/prerequisites/cluster-queue.template b/test/performance/prerequisites/cluster-queue.template new file mode 100644 index 000000000000..515b263ac6d0 --- /dev/null +++ b/test/performance/prerequisites/cluster-queue.template @@ -0,0 +1,17 @@ +apiVersion: kueue.x-k8s.io/v1alpha2 +kind: ClusterQueue +metadata: + name: default-cluster-queue +spec: + namespaceSelector: {} + resources: + - name: "cpu" + flavors: + - name: default + quota: + min: 100 + - name: "memory" + flavors: + - name: default + quota: + min: 50Gi diff --git a/test/performance/prerequisites/resource-flavor.yaml b/test/performance/prerequisites/resource-flavor.yaml new file mode 100644 index 000000000000..8937e21cb036 --- /dev/null +++ b/test/performance/prerequisites/resource-flavor.yaml @@ -0,0 +1,4 @@ +apiVersion: kueue.x-k8s.io/v1alpha2 +kind: ResourceFlavor +metadata: + name: default diff --git a/test/performance/run-test.sh b/test/performance/run-test.sh new file mode 100755 index 000000000000..b1f9628b2878 --- /dev/null +++ b/test/performance/run-test.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# shellcheck disable=SC1091 +if test -f .env; then + source .env +fi + +CL2_HOME_DIR=${CL2_HOME_DIR:=/Users/johny/perf-tests/clusterloader2} +CL2_BINARY_NAME=${CL2_BINARY_NAME:=clusterloader} + +USE_KUEUE=${USE_KUEUE:=true} + +DEFAULT_EXPERIMENTS=( + "10 2 0 2s 3m 100 100Gi" +) + +EXPERIMENTS=("${EXPERIMENTS[@]:=${DEFAULT_EXPERIMENTS[@]}}") + +KUBECONFIG=${KUBECONFIG:="$HOME/.kube/config"} +export KUBECONFIG + +PROVIDER=${PROVIDER:=gke} + +export EXEC_DEPLOYMENT_YAML="$CL2_HOME_DIR/pkg/execservice/manifest/exec_deployment.yaml" + +cp -r "$CL2_HOME_DIR/pkg/prometheus/manifests/" tmp_manifests +trap 'rm -r tmp_manifests' EXIT +export PROMETHEUS_MANIFEST_PATH=$(pwd)/tmp_manifests + +now=$(date +%Y-%m-%d-%H.%M.%S) + +if [[ "$USE_KUEUE" == true ]]; then + export CL2_USE_KUEUE=true + # Kustomize places all Kubernetes object manifests (role, rolebinding and servicemonitor) in the same file, + # however, Clusterloader expects, that there is one manifest per file, otherwise it does not create all the + # objects from the file. + # The yq expression below splits produced manifest into 3 files and then moves + # to temporary $PROMETHEUS_MANIFEST_PATH + kubectl kustomize ../../config/prometheus | yq -s '.kind' -o yaml + mv Role.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-role.yaml" + mv RoleBinding.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-role-binding.yaml" + mv ServiceMonitor.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-service-monitor.yaml" + kubectl apply -f prerequisites/resource-flavor.yaml + report_dir_name="kueue_report_$now" +else + report_dir_name="report_$now" +fi +mkdir -p "$report_dir_name" + +{ + echo -ne "Test Arguments," + echo -ne "P50 Job Create to start latency (ms)," + echo -ne "P90 Job Create to start latency (ms)," + echo -ne "P50 Job Start to complete latency (ms)," + echo -ne "P90 Job Start to complete latency (ms)," + echo -ne "Max Job Throughput (max jobs/s)," + echo -ne "Total Jobs," + echo -ne "Total Pods," + echo -ne "Duration (s)," + echo -ne "Avg Pod Waiting time (s)," + echo -ne "P90 Pod Waiting time (s)," + echo -ne "Avg Pod Completion time (s)," + echo "P90 Pod Completion time (s)" +} >>"$report_dir_name/summary.csv" + +for item in "${EXPERIMENTS[@]}"; do + IFS=" " read -ra conditions <<<"$item" + export CL2_SMALL_JOBS="${conditions[0]}" + export CL2_MEDIUM_JOBS="${conditions[1]}" + export CL2_LARGE_JOBS="${conditions[2]}" + export CL2_JOB_RUNNING_TIME="${conditions[3]}" + export CL2_TEST_TIMEOUT="${conditions[4]}" + cores="${conditions[5]}" + memory="${conditions[6]}" + experiment_dir="$report_dir_name/$CL2_SMALL_JOBS-$CL2_MEDIUM_JOBS-$CL2_LARGE_JOBS-$CL2_JOB_RUNNING_TIME-$CL2_TEST_TIMEOUT-$cores-$memory" + mkdir -p "$experiment_dir" + echo "======================================================================================" + echo "Running an experiment with [$CL2_SMALL_JOBS, $CL2_MEDIUM_JOBS, $CL2_LARGE_JOBS, $CL2_JOB_RUNNING_TIME, $CL2_TEST_TIMEOUT, $cores, $memory]" + if [[ "$USE_KUEUE" == true ]]; then + cp prerequisites/cluster-queue.template prerequisites/cluster-queue.yaml + yq -i e ".spec.resources[0].flavors[0].quota.min=$cores" prerequisites/cluster-queue.yaml + yq -i e ".spec.resources[1].flavors[0].quota.min=\"$memory\"" prerequisites/cluster-queue.yaml + kubectl apply -f prerequisites/cluster-queue.yaml + fi + "$CL2_HOME_DIR/$CL2_BINARY_NAME" \ + --testconfig=config.yaml \ + --enable-prometheus-server=true \ + --provider="$PROVIDER" \ + --v=2 --prometheus-scrape-metrics-server=true \ + --prometheus-scrape-kube-state-metrics=true \ + --report-dir="$experiment_dir" + echo "Experiment finished. Extracting results from the report..." + { + echo -ne "$CL2_SMALL_JOBS $CL2_MEDIUM_JOBS $CL2_LARGE_JOBS $CL2_JOB_RUNNING_TIME $CL2_TEST_TIMEOUT $cores $memory," + echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="create_to_start").data.Perc50' "$experiment_dir"/JobLifecycleLatency*.json)," + echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="create_to_start").data.Perc90' "$experiment_dir"/JobLifecycleLatency*.json)," + echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="start_to_complete").data.Perc50' "$experiment_dir"/JobLifecycleLatency*.json)," + echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="start_to_complete").data.Perc90' "$experiment_dir"/JobLifecycleLatency*.json)," + echo -ne "$(jq '.dataItems[0].data.max_job_throughput' "$experiment_dir"/GenericPrometheusQuery*.json)," + echo -ne "$(jq '.dataItems[0].data.total_jobs_scheduled' "$experiment_dir"/GenericPrometheusQuery*.json)," + echo -ne "$(jq '.dataItems[0].data.total_pods_scheduled' "$experiment_dir"/GenericPrometheusQuery*.json)," + echo -ne "$(jq '.dataItems[0].data.job_performance' "$experiment_dir"/Timer*.json)", + echo -ne "$(jq '.dataItems[0].data.avg_pod_waiting_time' "$experiment_dir"/GenericPrometheusQuery*.json)," + echo -ne "$(jq '.dataItems[0].data.perc_90_pod_waiting_time' "$experiment_dir"/GenericPrometheusQuery*.json)," + echo -ne "$(jq '.dataItems[0].data.avg_pod_running_time' "$experiment_dir"/GenericPrometheusQuery*.json)," + jq '.dataItems[0].data.perc_90_pod_completion_time' "$experiment_dir"/GenericPrometheusQuery*.json + } >>"$report_dir_name/summary.csv" + if [[ "$USE_KUEUE" == true ]]; then + kubectl delete -f prerequisites/cluster-queue.yaml + fi +done + +if [[ "$USE_KUEUE" == true ]]; then + kubectl delete -f prerequisites/resource-flavor.yaml +fi