From 4a066b8fea314da833009a8d51ea868173d54620 Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Wed, 29 Jan 2025 16:22:13 -0800 Subject: [PATCH 1/7] Fix minor issues with Bottlerocket OS SKU perf eval. --- modules/python/clusterloader2/cri/cri.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modules/python/clusterloader2/cri/cri.py b/modules/python/clusterloader2/cri/cri.py index 088b5a256..97569529e 100644 --- a/modules/python/clusterloader2/cri/cri.py +++ b/modules/python/clusterloader2/cri/cri.py @@ -38,7 +38,15 @@ def override_config_clusterloader2( print(f"Node {node.metadata.name} has allocatable cpu of {allocatable_cpu} and allocatable memory of {allocatable_memory}") cpu_value = int(allocatable_cpu.replace("m", "")) - memory_value = int(allocatable_memory.replace("Ki", "")) + # Bottlerocket OS SKU on EKS has allocatable_memory property in Mi. AKS and Amazon Linux (default SKUs) + # user Ki. Handling the Mi case here and converting Mi to Ki, if needed. + if "Mi" in allocatable_memory: + memory_value = int(allocatable_memory.replace("Mi", "")) * 1024 + elif "Ki" in allocatable_memory: + memory_value = int(allocatable_memory.replace("Ki", "")) + else: + raise Exception("Unexpected format of allocatable memory node property") + print(f"Node {node.metadata.name} has cpu value of {cpu_value} and memory value of {memory_value}") allocated_cpu, allocated_memory = _get_daemonsets_pods_allocated_resources(client, node.metadata.name) @@ -182,13 +190,13 @@ def main(): args = parser.parse_args() if args.command == "override": - override_config_clusterloader2(args.node_count, args.node_per_step, args.max_pods, args.repeats, args.operation_timeout, + override_config_clusterloader2(args.node_count, args.node_per_step, args.max_pods, args.repeats, args.operation_timeout, args.load_type, args.scale_enabled, args.pod_startup_latency_threshold, args.provider, args.cl2_override_file) elif args.command == "execute": execute_clusterloader2(args.cl2_image, args.cl2_config_dir, args.cl2_report_dir, args.kubeconfig, args.provider) elif args.command == "collect": - collect_clusterloader2(args.node_count, args.max_pods, args.repeats, args.load_type, + collect_clusterloader2(args.node_count, args.max_pods, args.repeats, args.load_type, args.cl2_report_dir, args.cloud_info, args.run_id, args.run_url, args.result_file) if __name__ == "__main__": From 2fea22f1cf1437a986d303d60daf2db61c6c19ba Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Thu, 30 Jan 2025 18:42:40 -0800 Subject: [PATCH 2/7] Rename Immutable Container host pipelines. --- .../cri-clusterloader2-immut-host.yml | 61 +++++++++++++++++++ .../cri-kbench-cp-bottlerocket.yml | 37 ----------- .../terraform-inputs/aws.tfvars | 8 +-- .../terraform-test-inputs/aws.json | 0 .../cri-kbench-cp/collect-clusterloader2.yml | 17 ------ .../cri-kbench-cp/execute-clusterloader2.yml | 17 ------ .../cri-kbench-cp/validate-resources.yml | 16 ----- 7 files changed, 65 insertions(+), 91 deletions(-) create mode 100644 pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml delete mode 100644 pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml rename scenarios/perf-eval/{cri-kbench-cp-bottlerocket => cri-clusterloader2-immut-host}/terraform-inputs/aws.tfvars (95%) rename scenarios/perf-eval/{cri-kbench-cp-bottlerocket => cri-clusterloader2-immut-host}/terraform-test-inputs/aws.json (100%) delete mode 100644 steps/topology/cri-kbench-cp/collect-clusterloader2.yml delete mode 100644 steps/topology/cri-kbench-cp/execute-clusterloader2.yml delete mode 100644 steps/topology/cri-kbench-cp/validate-resources.yml diff --git a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml new file mode 100644 index 000000000..8b784ad79 --- /dev/null +++ b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml @@ -0,0 +1,61 @@ +trigger: none + +variables: + SCENARIO_TYPE: perf-eval + SCENARIO_NAME: cri-clusterloader2-immut-host + SCENARIO_VERSION: main + +stages: + - stage: aws_westeurope + dependsOn: [] + jobs: + - template: /jobs/competitive-test.yml + parameters: + cloud: aws + regions: + - eu-west-1 + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20241016" + topology: cri-resource-consume + matrix: + n10-p300-memory: + node_count: 10 + max_pods: 30 + repeats: 1 + operation_timeout: 3m + load_type: memory + n10-p700-memory: + node_count: 10 + max_pods: 70 + repeats: 1 + operation_timeout: 7m + load_type: memory + n10-p1100-memory: + node_count: 10 + max_pods: 110 + repeats: 1 + operation_timeout: 11m + load_type: memory + n10-p300-cpu: + node_count: 10 + max_pods: 30 + repeats: 1 + operation_timeout: 3m + load_type: cpu + n10-p700-cpu: + node_count: 10 + max_pods: 70 + repeats: 1 + operation_timeout: 7m + load_type: cpu + n10-p1100-cpu: + node_count: 10 + max_pods: 110 + repeats: 1 + operation_timeout: 11m + load_type: cpu + max_parallel: 3 + timeout_in_minutes: 120 + credential_type: service_connection + ssh_key_enabled: false diff --git a/pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml b/pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml deleted file mode 100644 index 5e98687d5..000000000 --- a/pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml +++ /dev/null @@ -1,37 +0,0 @@ -trigger: none - -variables: - SCENARIO_TYPE: perf-eval - SCENARIO_NAME: cri-kbench-cp-bottlerocket - SCENARIO_VERSION: main - -stages: - - stage: aws_westeurope - dependsOn: [] - jobs: - - template: /jobs/competitive-test.yml - parameters: - cloud: aws - regions: - - eu-west-1 - engine: clusterloader2 - engine_input: - image: "ghcr.io/azure/clusterloader2:v20241016" - topology: cri-kbench-cp - matrix: - n3-p300-memory: - node_count: 3 - max_pods: 9 - repeats: 1 - operation_timeout: 3m - load_type: memory - n3-p300-cpu: - node_count: 3 - max_pods: 9 - repeats: 1 - operation_timeout: 3m - load_type: cpu - max_parallel: 3 - timeout_in_minutes: 120 - credential_type: service_connection - ssh_key_enabled: false diff --git a/scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-inputs/aws.tfvars b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/aws.tfvars similarity index 95% rename from scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-inputs/aws.tfvars rename to scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/aws.tfvars index 951d538d5..4f371ade7 100644 --- a/scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-inputs/aws.tfvars +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/aws.tfvars @@ -1,5 +1,5 @@ scenario_type = "perf-eval" -scenario_name = "cri-kbench-cp-bottlerocket" +scenario_name = "cri-clusterloader2-immut-host" deletion_delay = "2h" owner = "aks" @@ -84,9 +84,9 @@ eks_config_list = [{ name = "userpool0" ami_type = "BOTTLEROCKET_x86_64" instance_types = ["m5.4xlarge"] - min_size = 3 - max_size = 3 - desired_size = 3 + min_size = 10 + max_size = 10 + desired_size = 10 capacity_type = "ON_DEMAND" taints = [ { diff --git a/scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-test-inputs/aws.json b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/aws.json similarity index 100% rename from scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-test-inputs/aws.json rename to scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/aws.json diff --git a/steps/topology/cri-kbench-cp/collect-clusterloader2.yml b/steps/topology/cri-kbench-cp/collect-clusterloader2.yml deleted file mode 100644 index ee0c8a1bb..000000000 --- a/steps/topology/cri-kbench-cp/collect-clusterloader2.yml +++ /dev/null @@ -1,17 +0,0 @@ -parameters: -- name: cloud - type: string - default: '' -- name: engine_input - type: object - default: {} -- name: regions - type: object - default: {} - -steps: -- template: /steps/engine/clusterloader2/cri/collect.yml - parameters: - cloud: ${{ parameters.cloud }} - engine_input: ${{ parameters.engine_input }} - region: ${{ parameters.regions[0] }} diff --git a/steps/topology/cri-kbench-cp/execute-clusterloader2.yml b/steps/topology/cri-kbench-cp/execute-clusterloader2.yml deleted file mode 100644 index fcdab04db..000000000 --- a/steps/topology/cri-kbench-cp/execute-clusterloader2.yml +++ /dev/null @@ -1,17 +0,0 @@ -parameters: -- name: cloud - type: string - default: '' -- name: engine_input - type: object - default: {} -- name: regions - type: object - default: {} - -steps: -- template: /steps/engine/clusterloader2/cri/execute.yml - parameters: - cloud: ${{ parameters.cloud }} - engine_input: ${{ parameters.engine_input }} - region: ${{ parameters.regions[0] }} diff --git a/steps/topology/cri-kbench-cp/validate-resources.yml b/steps/topology/cri-kbench-cp/validate-resources.yml deleted file mode 100644 index 3118e1475..000000000 --- a/steps/topology/cri-kbench-cp/validate-resources.yml +++ /dev/null @@ -1,16 +0,0 @@ -parameters: -- name: cloud - type: string -- name: engine - type: string -- name: regions - type: object - -steps: -- template: /steps/cloud/${{ parameters.cloud }}/update-kubeconfig.yml - parameters: - role: client - region: ${{ parameters.regions[0] }} -- template: /steps/engine/clusterloader2/slo/validate.yml - parameters: - desired_nodes: 7 From 4884836c9c58b0d9d3e9638a79fda9dfd3187e19 Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Fri, 31 Jan 2025 13:44:58 -0800 Subject: [PATCH 3/7] Add Azure Immutable Container Host scenario. --- .../cri-clusterloader2-immut-host.yml | 53 +++++++++++++++ .../terraform-inputs/azure.tfvars | 64 +++++++++++++++++++ .../terraform-test-inputs/azure.json | 4 ++ 3 files changed, 121 insertions(+) create mode 100644 scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars create mode 100644 scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json diff --git a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml index 8b784ad79..1492342b1 100644 --- a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml +++ b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml @@ -6,6 +6,59 @@ variables: SCENARIO_VERSION: main stages: + - stage: azure_swedencentral + dependsOn: [] + jobs: + - template: /jobs/competitive-test.yml + parameters: + cloud: azure + regions: + - swedencentral + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20241016" + topology: cri-resource-consume + matrix: + n10-p300-memory: + node_count: 10 + max_pods: 30 + repeats: 1 + operation_timeout: 3m + load_type: memory + n10-p700-memory: + node_count: 10 + max_pods: 70 + repeats: 1 + operation_timeout: 7m + load_type: memory + n10-p1100-memory: + node_count: 10 + max_pods: 110 + repeats: 1 + operation_timeout: 11m + load_type: memory + n10-p300-cpu: + node_count: 10 + max_pods: 30 + repeats: 1 + operation_timeout: 3m + load_type: cpu + n10-p700-cpu: + node_count: 10 + max_pods: 70 + repeats: 1 + operation_timeout: 7m + load_type: cpu + n10-p1100-cpu: + node_count: 10 + max_pods: 110 + repeats: 1 + operation_timeout: 11m + load_type: cpu + max_parallel: 3 + timeout_in_minutes: 120 + credential_type: service_connection + ssh_key_enabled: false - stage: aws_westeurope dependsOn: [] jobs: diff --git a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars new file mode 100644 index 000000000..1a3a28686 --- /dev/null +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars @@ -0,0 +1,64 @@ +scenario_type = "perf-eval" +scenario_name = "cri-clusterloader2-immut-host" +deletion_delay = "2h" +owner = "aks" + +network_config_list = [ + { + role = "client" + vnet_name = "cri-vnet" + vnet_address_space = "10.0.0.0/9" + subnet = [ + { + name = "cri-subnet-1" + address_prefix = "10.0.0.0/16" + } + ] + network_security_group_name = "" + nic_public_ip_associations = [] + nsr_rules = [] + } +] + +aks_cli_config_list = [ + { + role = "client" + aks_name = "cri-resource-consume" + dns_prefix = "cri" + subnet_name = "cri-vnet" + sku_tier = "Standard" + network_profile = { + network_plugin = "azure" + network_plugin_mode = "overlay" + pod_cidr = "10.0.0.0/9" + service_cidr = "192.168.0.0/16" + dns_service_ip = "192.168.0.10" + } + default_node_pool = { + name = "default" + node_count = 3 + vm_size = "Standard_D16_v3" + os_disk_type = "Managed" + only_critical_addons_enabled = true + temporary_name_for_rotation = "defaulttmp" + } + extra_node_pool = [ + { + name = "prompool" + node_count = 1 + auto_scaling_enabled = false + vm_size = "Standard_D16_v3" + node_labels = { "prometheus" = "true" } + }, + { + name = "userpool0" + node_count = 10 + auto_scaling_enabled = false + vm_size = "Standard_D16_v3" + node_taints = ["cri-resource-consume=true:NoSchedule"] + node_labels = { "cri-resource-consume" = "true" } + } + ] + kubernetes_version = "1.31" + } +] diff --git a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json new file mode 100644 index 000000000..ea27a572c --- /dev/null +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json @@ -0,0 +1,4 @@ +{ + "run_id" : "123456789", + "region" : "eastus" +} From 28f306d092e12e426e2e12941d793cf887aaa1cd Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Mon, 3 Feb 2025 14:00:25 -0800 Subject: [PATCH 4/7] Aks_cli to support node_labels and user AKS custom header for extra pools. --- modules/terraform/azure/aks-cli/main.tf | 13 +++++++++++++ modules/terraform/azure/aks-cli/variables.tf | 2 ++ modules/terraform/azure/variables.tf | 4 ++++ 3 files changed, 19 insertions(+) diff --git a/modules/terraform/azure/aks-cli/main.tf b/modules/terraform/azure/aks-cli/main.tf index 99543ffdb..d69970f07 100644 --- a/modules/terraform/azure/aks-cli/main.tf +++ b/modules/terraform/azure/aks-cli/main.tf @@ -121,6 +121,12 @@ resource "terraform_data" "aks_cli" { "--nodepool-name", var.aks_cli_config.default_node_pool.name, "--node-count", var.aks_cli_config.default_node_pool.node_count, "--node-vm-size", var.aks_cli_config.default_node_pool.vm_size, + length(var.aks_cli_config.default_node_pool.node_labels) == 0 ? "" : format("%s %s", + "--labels", join(" ", [ + for label_name, label_value in var.aks_cli_config.default_node_pool.node_labels : + format("%s=%s", label_name, label_value) + ]) + ), "--vm-set-type", var.aks_cli_config.default_node_pool.vm_set_type, local.optional_parameters, local.subnet_id_parameter, @@ -159,6 +165,13 @@ resource "terraform_data" "aks_nodepool_cli" { "--nodepool-name", each.value.name, "--node-count", each.value.node_count, "--node-vm-size", each.value.vm_size, + local.aks_custom_headers_flags, + length(each.value.node_labels) == 0 ? "" : format("%s %s", + "--labels", join(" ", [ + for label_name, label_value in each.value.node_labels : + format("%s=%s", label_name, label_value) + ]) + ), "--vm-set-type", each.value.vm_set_type, ]) } diff --git a/modules/terraform/azure/aks-cli/variables.tf b/modules/terraform/azure/aks-cli/variables.tf index 8a472cc19..fcc8feb64 100644 --- a/modules/terraform/azure/aks-cli/variables.tf +++ b/modules/terraform/azure/aks-cli/variables.tf @@ -36,6 +36,7 @@ variable "aks_cli_config" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") }) extra_node_pool = optional( @@ -43,6 +44,7 @@ variable "aks_cli_config" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") })), []) optional_parameters = optional(list(object({ diff --git a/modules/terraform/azure/variables.tf b/modules/terraform/azure/variables.tf index 9b9431975..f8a85cd06 100644 --- a/modules/terraform/azure/variables.tf +++ b/modules/terraform/azure/variables.tf @@ -13,6 +13,7 @@ variable "json_input" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = string })) aks_cli_user_node_pool = optional( @@ -20,6 +21,7 @@ variable "json_input" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = string })) ) @@ -217,6 +219,7 @@ variable "aks_cli_config_list" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") }) extra_node_pool = optional( @@ -224,6 +227,7 @@ variable "aks_cli_config_list" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") })), []) optional_parameters = optional(list(object({ From ded686e5a48fd2292ad2f2c3c96733988b34f6fb Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Mon, 3 Feb 2025 18:03:37 -0800 Subject: [PATCH 5/7] Run clusterloader2 on Azure immut host in sweden. --- .../cri-clusterloader2-immut-host.yml | 93 ++++--------------- .../terraform-inputs/azure.tfvars | 10 +- .../validate-resources.yml | 2 +- 3 files changed, 26 insertions(+), 79 deletions(-) diff --git a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml index 1492342b1..274911b06 100644 --- a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml +++ b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml @@ -19,96 +19,43 @@ stages: image: "ghcr.io/azure/clusterloader2:v20241016" topology: cri-resource-consume matrix: - n10-p300-memory: - node_count: 10 - max_pods: 30 + n3-p300-memory: + node_count: 3 + max_pods: 9 repeats: 1 operation_timeout: 3m load_type: memory - n10-p700-memory: - node_count: 10 - max_pods: 70 + n3-p700-memory: + node_count: 3 + max_pods: 21 repeats: 1 operation_timeout: 7m load_type: memory - n10-p1100-memory: - node_count: 10 - max_pods: 110 + n3-p1100-memory: + node_count: 3 + max_pods: 33 repeats: 1 operation_timeout: 11m load_type: memory - n10-p300-cpu: - node_count: 10 - max_pods: 30 + n3-p300-cpu: + node_count: 3 + max_pods: 9 repeats: 1 operation_timeout: 3m load_type: cpu - n10-p700-cpu: - node_count: 10 - max_pods: 70 + n3-p700-cpu: + node_count: 3 + max_pods: 21 repeats: 1 operation_timeout: 7m load_type: cpu - n10-p1100-cpu: - node_count: 10 - max_pods: 110 + n3-p1100-cpu: + node_count: 3 + max_pods: 33 repeats: 1 operation_timeout: 11m load_type: cpu - max_parallel: 3 - timeout_in_minutes: 120 - credential_type: service_connection - ssh_key_enabled: false - - stage: aws_westeurope - dependsOn: [] - jobs: - - template: /jobs/competitive-test.yml - parameters: - cloud: aws - regions: - - eu-west-1 - engine: clusterloader2 - engine_input: - image: "ghcr.io/azure/clusterloader2:v20241016" - topology: cri-resource-consume - matrix: - n10-p300-memory: - node_count: 10 - max_pods: 30 - repeats: 1 - operation_timeout: 3m - load_type: memory - n10-p700-memory: - node_count: 10 - max_pods: 70 - repeats: 1 - operation_timeout: 7m - load_type: memory - n10-p1100-memory: - node_count: 10 - max_pods: 110 - repeats: 1 - operation_timeout: 11m - load_type: memory - n10-p300-cpu: - node_count: 10 - max_pods: 30 - repeats: 1 - operation_timeout: 3m - load_type: cpu - n10-p700-cpu: - node_count: 10 - max_pods: 70 - repeats: 1 - operation_timeout: 7m - load_type: cpu - n10-p1100-cpu: - node_count: 10 - max_pods: 110 - repeats: 1 - operation_timeout: 11m - load_type: cpu - max_parallel: 3 - timeout_in_minutes: 120 + max_parallel: 1 + timeout_in_minutes: 240 credential_type: service_connection ssh_key_enabled: false diff --git a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars index 1a3a28686..921fa8a5f 100644 --- a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars @@ -36,8 +36,8 @@ aks_cli_config_list = [ } default_node_pool = { name = "default" - node_count = 3 - vm_size = "Standard_D16_v3" + node_count = 2 + vm_size = "Standard_D16_v4" os_disk_type = "Managed" only_critical_addons_enabled = true temporary_name_for_rotation = "defaulttmp" @@ -47,14 +47,14 @@ aks_cli_config_list = [ name = "prompool" node_count = 1 auto_scaling_enabled = false - vm_size = "Standard_D16_v3" + vm_size = "Standard_D16_v4" node_labels = { "prometheus" = "true" } }, { name = "userpool0" - node_count = 10 + node_count = 3 auto_scaling_enabled = false - vm_size = "Standard_D16_v3" + vm_size = "Standard_D16_v4" node_taints = ["cri-resource-consume=true:NoSchedule"] node_labels = { "cri-resource-consume" = "true" } } diff --git a/steps/topology/cri-resource-consume/validate-resources.yml b/steps/topology/cri-resource-consume/validate-resources.yml index 1efbf41dd..b5d380860 100644 --- a/steps/topology/cri-resource-consume/validate-resources.yml +++ b/steps/topology/cri-resource-consume/validate-resources.yml @@ -13,4 +13,4 @@ steps: region: ${{ parameters.regions[0] }} - template: /steps/engine/clusterloader2/slo/validate.yml parameters: - desired_nodes: 14 + desired_nodes: 6 From 8ef42a6de4730d318297dba7b5d5695a79c29328 Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Mon, 3 Feb 2025 23:49:28 -0800 Subject: [PATCH 6/7] Temp rollback: one test to build a default cluster. --- jobs/competitive-test.yml | 7 ----- .../cri-clusterloader2-immut-host.yml | 30 ------------------- 2 files changed, 37 deletions(-) diff --git a/jobs/competitive-test.yml b/jobs/competitive-test.yml index c4872ad11..8a3121f1a 100644 --- a/jobs/competitive-test.yml +++ b/jobs/competitive-test.yml @@ -97,10 +97,3 @@ jobs: regions: ${{ parameters.regions }} engine_input: ${{ parameters.engine_input }} credential_type: ${{ parameters.credential_type }} - - template: /steps/cleanup-resources.yml - parameters: - cloud: ${{ parameters.cloud }} - regions: ${{ parameters.regions }} - terraform_arguments: ${{ parameters.terraform_arguments }} - retry_attempt_count: ${{ parameters.retry_attempt_count }} - credential_type: ${{ parameters.credential_type }} diff --git a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml index 274911b06..b7cdd28f0 100644 --- a/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml +++ b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml @@ -25,36 +25,6 @@ stages: repeats: 1 operation_timeout: 3m load_type: memory - n3-p700-memory: - node_count: 3 - max_pods: 21 - repeats: 1 - operation_timeout: 7m - load_type: memory - n3-p1100-memory: - node_count: 3 - max_pods: 33 - repeats: 1 - operation_timeout: 11m - load_type: memory - n3-p300-cpu: - node_count: 3 - max_pods: 9 - repeats: 1 - operation_timeout: 3m - load_type: cpu - n3-p700-cpu: - node_count: 3 - max_pods: 21 - repeats: 1 - operation_timeout: 7m - load_type: cpu - n3-p1100-cpu: - node_count: 3 - max_pods: 33 - repeats: 1 - operation_timeout: 11m - load_type: cpu max_parallel: 1 timeout_in_minutes: 240 credential_type: service_connection From ca5c54ace993b19760b1737542caee508641199f Mon Sep 17 00:00:00 2001 From: Anton Pegushin Date: Tue, 4 Feb 2025 06:36:39 -0800 Subject: [PATCH 7/7] Target AzureLinux os sku --- .../terraform-inputs/azure.tfvars | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars index 921fa8a5f..c5207780c 100644 --- a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars @@ -20,7 +20,7 @@ network_config_list = [ } ] -aks_cli_config_list = [ +aks_config_list = [ { role = "client" aks_name = "cri-resource-consume" @@ -39,6 +39,7 @@ aks_cli_config_list = [ node_count = 2 vm_size = "Standard_D16_v4" os_disk_type = "Managed" + os_sku = "AzureLinux" only_critical_addons_enabled = true temporary_name_for_rotation = "defaulttmp" } @@ -48,6 +49,7 @@ aks_cli_config_list = [ node_count = 1 auto_scaling_enabled = false vm_size = "Standard_D16_v4" + os_sku = "AzureLinux" node_labels = { "prometheus" = "true" } }, { @@ -55,6 +57,7 @@ aks_cli_config_list = [ node_count = 3 auto_scaling_enabled = false vm_size = "Standard_D16_v4" + os_sku = "AzureLinux" node_taints = ["cri-resource-consume=true:NoSchedule"] node_labels = { "cri-resource-consume" = "true" } }