From b581b68a94e50dfd288088b11e68a0d21758aa17 Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Wed, 13 Oct 2021 18:51:03 -0400 Subject: [PATCH] Fully automate cluster buildout. Add azure file share mount to stress tests. --- .../stress-testing/deploy-stress-tests.ps1 | 4 +- tools/stress-cluster/cluster/README.md | 2 + .../cluster/azure/cluster/cluster.bicep | 60 ++++--- .../cluster/azure/cluster/storage.bicep | 21 +++ tools/stress-cluster/cluster/azure/main.bicep | 43 +++++- .../cluster/azure/parameters/dev.json | 12 +- .../cluster/azure/parameters/prod.json | 7 +- .../cluster/azure/parameters/test.json | 15 +- .../templates/_container_env.tpl | 4 + .../templates/_deploy_configmap.tpl | 2 +- .../templates/_deploy_volumes.tpl | 4 +- .../templates/_init_deploy.tpl | 2 +- .../templates/_init_env.tpl | 5 +- .../templates/_stress_test.tpl | 17 ++ .../stress-test-cluster-secret-provider.yaml | 16 ++ .../kubernetes/stress-test-addons/values.yaml | 32 ++-- tools/stress-cluster/cluster/provision.ps1 | 146 ++++++++++++++++++ .../Stress.Watcher/src/PodEventHandler.cs | 16 ++ 18 files changed, 360 insertions(+), 48 deletions(-) create mode 100644 tools/stress-cluster/cluster/azure/cluster/storage.bicep create mode 100644 tools/stress-cluster/cluster/provision.ps1 diff --git a/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 index 76eff6a31e1f..02e91cc843ac 100644 --- a/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 +++ b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 @@ -76,9 +76,9 @@ function DeployStressTests( [string]$environment = 'test', [string]$repository = 'images', [boolean]$pushImages = $false, - [string]$clusterGroup = 'rg-stress-test-cluster-', + [string]$clusterGroup = 'rg-stress-cluster-test', [string]$deployId = 'local', - [string]$subscription = 'Azure SDK Test Resources' + [string]$subscription = 'Azure SDK Developer Playground' ) { if ($PSCmdlet.ParameterSetName -eq 'DoLogin') { Login $subscription $clusterGroup $pushImages diff --git a/tools/stress-cluster/cluster/README.md b/tools/stress-cluster/cluster/README.md index c957217ddbd5..86afd4d2a48d 100644 --- a/tools/stress-cluster/cluster/README.md +++ b/tools/stress-cluster/cluster/README.md @@ -122,9 +122,11 @@ az ad sp create-for-rbac -n 'stress-test-provisioner' --role Contributor --scope Create an env file with the service principal values created above: ``` +AZURE_CLIENT_OID= AZURE_CLIENT_ID= AZURE_CLIENT_SECRET= AZURE_TENANT_ID= +AZURE_SUBSCRIPTION_ID= ``` Upload it to the static keyvault: diff --git a/tools/stress-cluster/cluster/azure/cluster/cluster.bicep b/tools/stress-cluster/cluster/azure/cluster/cluster.bicep index 828082951283..54d6a0824c51 100644 --- a/tools/stress-cluster/cluster/azure/cluster/cluster.bicep +++ b/tools/stress-cluster/cluster/azure/cluster/cluster.bicep @@ -4,20 +4,52 @@ param groupSuffix string param dnsPrefix string = 's1' param clusterName string param location string = resourceGroup().location -param agentVMSize string = 'Standard_D2_v3' - -@minValue(1) -@maxValue(50) -@description('The number of nodes for the cluster.') -param agentCount int = 3 +param enableHighMemAgentPool bool = false // monitoring parameters param enableMonitoring bool = false param workspaceId string -var kubernetesVersion = '1.20.5' +var kubernetesVersion = '1.21.2' var nodeResourceGroup = 'rg-nodes-${dnsPrefix}-${clusterName}-${groupSuffix}' -var agentPoolName = 'agentpool01' + +var defaultAgentPool = { + name: 'default' + count: 3 + minCount: 3 + maxCount: 9 + mode: 'System' + vmSize: 'Standard_D2_v3' + type: 'VirtualMachineScaleSets' + osType: 'Linux' + enableAutoScaling: true + enableEncryptionAtHost: true + nodeLabels: { + 'sku': 'default' + } +} + +var highMemAgentPool = { + name: 'highmemory' + count: 1 + minCount: 1 + maxCount: 3 + mode: 'System' + vmSize: 'Standard_D4ds_v4' + type: 'VirtualMachineScaleSets' + osType: 'Linux' + enableAutoScaling: true + enableEncryptionAtHost: true + nodeLabels: { + 'sku': 'highMem' + } +} + +var agentPools = concat([ + defaultAgentPool + ], enableHighMemAgentPool ? [ + highMemAgentPool + ] : []) resource cluster 'Microsoft.ContainerService/managedClusters@2020-09-01' = { name: clusterName @@ -41,17 +73,7 @@ resource cluster 'Microsoft.ContainerService/managedClusters@2020-09-01' = { kubernetesVersion: kubernetesVersion enableRBAC: true dnsPrefix: dnsPrefix - agentPoolProfiles: [ - { - name: agentPoolName - count: agentCount - mode: 'System' - vmSize: agentVMSize - type: 'VirtualMachineScaleSets' - osType: 'Linux' - enableAutoScaling: false - } - ] + agentPoolProfiles: agentPools servicePrincipalProfile: { clientId: 'msi' } diff --git a/tools/stress-cluster/cluster/azure/cluster/storage.bicep b/tools/stress-cluster/cluster/azure/cluster/storage.bicep new file mode 100644 index 000000000000..afcc36ed8aeb --- /dev/null +++ b/tools/stress-cluster/cluster/azure/cluster/storage.bicep @@ -0,0 +1,21 @@ +param location string = resourceGroup().location +param storageName string +param fileShareName string + +resource storage 'Microsoft.Storage/storageAccounts@2019-06-01' = { + name: storageName + location: location + kind: 'StorageV2' + sku: { + name: 'Standard_LRS' + } +} + +resource fileshare 'Microsoft.Storage/storageAccounts/fileServices/shares@2021-04-01' = { + name: '${storage.name}/default/${fileShareName}' + properties: { } +} + +output name string = storage.name +output key string = storage.listKeys().keys[0].value +output fileShareName string = fileShareName diff --git a/tools/stress-cluster/cluster/azure/main.bicep b/tools/stress-cluster/cluster/azure/main.bicep index b1beb6a4309c..594c7b66a4f6 100644 --- a/tools/stress-cluster/cluster/azure/main.bicep +++ b/tools/stress-cluster/cluster/azure/main.bicep @@ -1,5 +1,6 @@ targetScope = 'subscription' +param subscriptionId string = '' param groupSuffix string param clusterName string param clusterLocation string = 'westus2' @@ -8,6 +9,8 @@ param staticTestSecretsKeyvaultGroup string param monitoringLocation string = 'centralus' param tags object param enableMonitoring bool = false +param enableHighMemAgentPool bool = false +param enableDebugStorage bool = false // Azure Developer Platform Team Group // https://ms.portal.azure.com/#blade/Microsoft_AAD_IAM/GroupDetailsMenuBlade/Overview/groupId/56709ad9-8962-418a-ad0d-4b25fa962bae @@ -15,8 +18,10 @@ param accessGroups array = [ '56709ad9-8962-418a-ad0d-4b25fa962bae' ] +var groupName = 'rg-stress-cluster-${groupSuffix}' + resource group 'Microsoft.Resources/resourceGroups@2020-10-01' = { - name: 'rg-stress-test-cluster-${groupSuffix}' + name: groupName location: clusterLocation tags: tags } @@ -52,6 +57,7 @@ module cluster 'cluster/cluster.bicep' = { tags: tags groupSuffix: groupSuffix enableMonitoring: enableMonitoring + enableHighMemAgentPool: enableHighMemAgentPool workspaceId: enableMonitoring ? logWorkspace.outputs.id : '' } } @@ -60,15 +66,34 @@ module containerRegistry 'cluster/acr.bicep' = { name: 'containerRegistry' scope: group params: { - registryName: '${replace(clusterName, '-', '')}registry' + registryName: '${replace(clusterName, '-', '')}${resourceSuffix}' location: clusterLocation objectIds: concat(accessGroups, array(cluster.outputs.kubeletIdentityObjectId)) } } +module storage 'cluster/storage.bicep' = if (enableDebugStorage) { + name: 'storage' + scope: group + params: { + storageName: 'stressdebug${resourceSuffix}' + fileShareName: 'stressfiles${resourceSuffix}' + location: clusterLocation + } +} + var appInsightsInstrumentationKeySecretName = 'appInsightsInstrumentationKey-${resourceSuffix}' +// Value is in dotenv format as it will be appended to stress test container dotenv files var appInsightsInstrumentationKeySecretValue = 'APPINSIGHTS_INSTRUMENTATIONKEY=${appInsights.outputs.instrumentationKey}\n' +// Storage account information used for kubernetes fileshare volume mounting via the azure files csi driver +// See https://docs.microsoft.com/en-us/azure/aks/azure-files-volume#create-a-kubernetes-secret +// See https://docs.microsoft.com/en-us/azure/aks/azure-files-csi +var debugStorageKeySecretName = 'debugStorageKey-${resourceSuffix}' +var debugStorageKeySecretValue = '${storage.outputs.key}' +var debugStorageAccountSecretName = 'debugStorageAccount-${resourceSuffix}' +var debugStorageAccountSecretValue = '${storage.outputs.name}' + module keyvault 'cluster/keyvault.bicep' = if (enableMonitoring) { name: 'keyvault' scope: group @@ -83,6 +108,14 @@ module keyvault 'cluster/keyvault.bicep' = if (enableMonitoring) { secretName: appInsightsInstrumentationKeySecretName secretValue: appInsightsInstrumentationKeySecretValue } + { + secretName: debugStorageKeySecretName + secretValue: debugStorageKeySecretValue + } + { + secretName: debugStorageAccountSecretName + secretValue: debugStorageAccountSecretValue + } ] } } @@ -99,10 +132,14 @@ module accessPolicy 'cluster/static-vault-access-policy.bicep' = { } output STATIC_TEST_SECRETS_KEYVAULT string = staticTestSecretsKeyvaultName -output CLUSTER_KEYVAULT string = keyvault.outputs.keyvaultName +output CLUSTER_TEST_SECRETS_KEYVAULT string = keyvault.outputs.keyvaultName output SECRET_PROVIDER_CLIENT_ID string = cluster.outputs.secretProviderClientId output CLUSTER_NAME string = cluster.outputs.clusterName output CONTAINER_REGISTRY_NAME string = containerRegistry.outputs.containerRegistryName output APPINSIGHTS_KEY_SECRET_NAME string = appInsightsInstrumentationKeySecretName +output DEBUG_STORAGE_KEY_SECRET_NAME string = debugStorageKeySecretName +output DEBUG_STORAGE_ACCOUNT_SECRET_NAME string = debugStorageAccountSecretName +output DEBUG_FILESHARE_NAME string = storage.outputs.fileShareName output RESOURCE_GROUP string = group.name +output SUBSCRIPTION_ID string = subscriptionId output TENANT_ID string = subscription().tenantId diff --git a/tools/stress-cluster/cluster/azure/parameters/dev.json b/tools/stress-cluster/cluster/azure/parameters/dev.json index 1de6b9a6863b..7f3f79a2801f 100644 --- a/tools/stress-cluster/cluster/azure/parameters/dev.json +++ b/tools/stress-cluster/cluster/azure/parameters/dev.json @@ -2,7 +2,10 @@ "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", "contentVersion": "1.0.0.0", "parameters": { - "groupName": { + "subscriptionId": { + "value": // add me + }, + "groupSuffix": { "value": // add me }, "clusterName": { @@ -12,10 +15,13 @@ "value": "westus2" }, "staticTestSecretsKeyvaultName": { - "value": "StressTestSecrets" + "value": "stress-secrets-dev" }, "staticTestSecretsKeyvaultGroup": { - "value": "rg-StressTestSecrets" + "value": "rg-stress-secrets-dev" + }, + "enableDebugStorage": { + "value": true }, "tags": { "value": { diff --git a/tools/stress-cluster/cluster/azure/parameters/prod.json b/tools/stress-cluster/cluster/azure/parameters/prod.json index 711a854de79d..2cbdc63b5794 100644 --- a/tools/stress-cluster/cluster/azure/parameters/prod.json +++ b/tools/stress-cluster/cluster/azure/parameters/prod.json @@ -2,6 +2,9 @@ "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", "contentVersion": "1.0.0.0", "parameters": { + "subscriptionId": { + "value": "2cd617ea-1866-46b1-90e3-fffb087ebf9b" + }, "groupSuffix": { "value": "prod" }, @@ -15,10 +18,10 @@ "value": "centralus" }, "staticTestSecretsKeyvaultName": { - "value": "StressTestSecrets" + "value": "stress-secrets-prod" }, "staticTestSecretsKeyvaultGroup": { - "value": "rg-StressTestSecrets" + "value": "rg-stress-secrets-prod" }, "enableMonitoring": { "value": true diff --git a/tools/stress-cluster/cluster/azure/parameters/test.json b/tools/stress-cluster/cluster/azure/parameters/test.json index 46025581ec32..4a13646fe208 100644 --- a/tools/stress-cluster/cluster/azure/parameters/test.json +++ b/tools/stress-cluster/cluster/azure/parameters/test.json @@ -2,8 +2,11 @@ "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", "contentVersion": "1.0.0.0", "parameters": { + "subscriptionId": { + "value": "faa080af-c1d8-40ad-9cce-e1a450ca5b57" + }, "groupSuffix": { - "value": "" + "value": "test" }, "clusterName": { "value": "stress-test" @@ -12,14 +15,20 @@ "value": "westus2" }, "staticTestSecretsKeyvaultName": { - "value": "StressTestSecrets" + "value": "stress-secrets-test" }, "staticTestSecretsKeyvaultGroup": { - "value": "rg-StressTestSecrets" + "value": "rg-stress-secrets-test" }, "enableMonitoring": { "value": true }, + "enableDebugStorage": { + "value": true + }, + "enableHighMemAgentPool": { + "value": true + }, "tags": { "value": { "environment": "Test", diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl index 5354973fb2ed..60f7786c7db7 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl @@ -2,7 +2,11 @@ env: - name: ENV_FILE value: /mnt/outputs/.env + - name: DEBUG_SHARE + value: /mnt/share/ volumeMounts: - name: test-env-{{ lower .Scenario }}-{{ .Release.Name }}-{{ .Release.Revision }} mountPath: /mnt/outputs + - name: debug-file-share-{{ .Release.Name }} + mountPath: /mnt/share {{- end -}} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_configmap.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_configmap.tpl index 61a93746ef25..013fefe9f530 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_configmap.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_configmap.tpl @@ -2,7 +2,7 @@ apiVersion: v1 kind: ConfigMap metadata: - name: "{{ .Release.Name }}-test-resources" + name: "{{ .Release.Name }}-{{ .Release.Revision }}-test-resources" namespace: {{ .Release.Namespace }} data: template: | diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_volumes.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_volumes.tpl index 5e8b70ab2741..f4c9e9d36d3c 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_volumes.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_deploy_volumes.tpl @@ -1,7 +1,7 @@ {{ define "stress-test-addons.deploy-volumes" }} -- name: {{ .Release.Name }}-test-resources +- name: {{ .Release.Name }}-{{ .Release.Revision }}-test-resources configMap: - name: "{{ .Release.Name }}-test-resources" + name: "{{ .Release.Name }}-{{ .Release.Revision }}-test-resources" items: - key: template path: test-resources.json diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl index d6da18d550a4..021b866e73ed 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl @@ -1,5 +1,5 @@ {{ define "stress-test-addons.init-deploy" }} -- name: azure-deployer +- name: init-azure-deployer image: mcr.microsoft.com/azure-cli command: ['bash', '-c'] args: diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl index c1c83ed515a5..4f7e2f40816c 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl @@ -1,9 +1,10 @@ {{ define "stress-test-addons.init-env" }} -- name: test-env-initializer +- name: init-test-env image: k8s.gcr.io/e2e-test-images/busybox:1.29 command: ['sh', '-c'] args: - # Merge all mounted keyvault secrets into env file + # Merge all mounted keyvault secrets into env file. + # Secret values are expected to be in format = - 'cat /mnt/secrets/static/* /mnt/secrets/cluster/* > $ENV_FILE' env: - name: ENV_FILE diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl index 8f85715398fc..80f406dad22f 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl @@ -27,6 +27,8 @@ spec: {{- include "stress-test-addons.env-volumes" . | nindent 8 }} # Volume template for mounting ARM templates {{- include "stress-test-addons.deploy-volumes" . | nindent 8 }} + # Volume template for mounting azure file share for debugging + {{- include "stress-test-addons.debug-file-volumes" . | nindent 8 }} initContainers: # Init container template for injecting secrets # (e.g. app insights instrumentation key, azure client credentials) @@ -44,6 +46,13 @@ spec: --- {{- /* Copy scenario name into top level key of global context */}} {{ $instance := deepCopy $global | merge (dict "Scenario" . ) -}} +{{- /* + The .Values context here corresponds to the parent chart that includes this library as a dependency, + meaning there will be a .Values.stress-test-addons key that contains the values specific to this library. + Given that we are calling into library templates, replace the values context with only the nested + context for this sub-chart. +*/ -}} +{{ $_ := set $instance "Values" (index $instance "Values" "stress-test-addons") -}} {{- $jobOverride := fromYaml (include "stress-test-addons.job-wrapper.tpl" (list $instance $podDefinition)) -}} {{- $tpl := fromYaml (include "stress-test-addons.deploy-job-template.tpl" $instance) -}} {{- toYaml (merge $jobOverride $tpl) -}} @@ -71,6 +80,7 @@ spec: volumes: # Volume template for mounting secrets {{- include "stress-test-addons.env-volumes" . | nindent 8 }} + {{- include "stress-test-addons.debug-file-volumes" . | nindent 8 }} initContainers: # Init container template for injecting secrets # (e.g. app insights instrumentation key, azure client credentials) @@ -85,6 +95,13 @@ spec: --- {{- /* Copy scenario name into top level key of global context */}} {{ $instance := deepCopy $global | merge (dict "Scenario" . ) -}} +{{- /* + The .Values context here corresponds to the parent chart that includes this library as a dependency, + meaning there will be a .Values.stress-test-addons key that contains the values specific to this library. + Given that we are calling into library templates, replace the values context with only the nested + context for this sub-chart. +*/ -}} +{{ $_ := set $instance "Values" (index $instance "Values" "stress-test-addons") -}} {{- $jobOverride := fromYaml (include "stress-test-addons.job-wrapper.tpl" (list $instance $podDefinition)) -}} {{- $tpl := fromYaml (include "stress-test-addons.env-job-template.tpl" $instance) -}} {{- toYaml (merge $jobOverride $tpl) -}} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-cluster-secret-provider.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-cluster-secret-provider.yaml index 3e61093879df..61f984034c5f 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-cluster-secret-provider.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-cluster-secret-provider.yaml @@ -11,6 +11,16 @@ spec: data: - objectName: {{ get .Values.appInsightsKeySecretName .Values.env }} key: value + # Storage account information used for kubernetes fileshare volume mounting via the azure files csi driver + # See https://docs.microsoft.com/en-us/azure/aks/azure-files-volume#create-a-kubernetes-secret + # See https://docs.microsoft.com/en-us/azure/aks/azure-files-csi + - secretName: debugstorageaccountconfig + type: Opaque + data: + - objectName: {{ get .Values.debugStorageKeySecretName .Values.env }} + key: azurestorageaccountkey + - objectName: {{ get .Values.debugStorageAccountSecretName .Values.env }} + key: azurestorageaccountname parameters: useVMManagedIdentity: "true" userAssignedIdentityID: {{ get .Values.secretProviderIdentity .Values.env }} # az vmss identity show ... @@ -20,4 +30,10 @@ spec: - | objectName: {{ get .Values.appInsightsKeySecretName .Values.env }} objectType: secret + - | + objectName: {{ get .Values.debugStorageKeySecretName .Values.env }} + objectType: secret + - | + objectName: {{ get .Values.debugStorageAccountSecretName .Values.env }} + objectType: secret tenantId: {{ get .Values.tenantId .Values.env }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml index 27069a7ce4e6..5e15fec269c5 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml @@ -1,21 +1,32 @@ env: test - appInsightsKeySecretName: - test: appInsightsInstrumentationKey-uj7jqs4ukw2gi + test: appInsightsInstrumentationKey-tbiruti6oi24k prod: appInsightsInstrumentationKey-dqojlttkovp2c - dev: 'not-specified' + dev: "" +debugStorageKeySecretName: + test: debugStorageKey-tbiruti6oi24k + prod: "" + dev: "" +debugStorageAccountSecretName: + test: debugStorageAccount-tbiruti6oi24k + prod: "" + dev: "" +debugFileShareName: + test: stressfilestbiruti6oi24k + prod: "" + dev: "" staticTestSecretsKeyvaultName: - test: StressTestSecrets - prod: StressTestSecrets - dev: 'not-specified' + test: stress-secrets-test + prod: StressSecretsProd + dev: "" clusterTestSecretsKeyvaultName: - test: stress-kv-uj7jqs4ukw2gi + test: stress-kv-tbiruti6oi24k prod: stress-kv-dqojlttkovp2c - dev: 'not-specified' + dev: "" secretProviderIdentity: - test: bc7712b9-1622-4b7f-9943-604c73cda131 + test: 9eca3e6f-842f-495f-b106-4f3331406e79 prod: ea706f92-1d9a-4611-9cde-8305aa3d9e98 - dev: 'not-specified' + dev: "" subscription: test: public prod: public @@ -24,3 +35,4 @@ tenantId: test: 72f988bf-86f1-41af-91ab-2d7cd011db47 prod: 72f988bf-86f1-41af-91ab-2d7cd011db47 dev: 72f988bf-86f1-41af-91ab-2d7cd011db47 + diff --git a/tools/stress-cluster/cluster/provision.ps1 b/tools/stress-cluster/cluster/provision.ps1 new file mode 100644 index 000000000000..4d4d86c2b2dc --- /dev/null +++ b/tools/stress-cluster/cluster/provision.ps1 @@ -0,0 +1,146 @@ +param ( + [string]$env = 'test' +) + +function Run() +{ + Write-Host "`n==> $args`n" -ForegroundColor Green + $command, $arguments = $args + & $command $arguments + if ($LASTEXITCODE) { + Write-Error "Command '$args' failed with code: $LASTEXITCODE" -ErrorAction 'Continue' + } +} + +function RunOrExitOnFailure() +{ + Run @args + if ($LASTEXITCODE) { + exit $LASTEXITCODE + } +} + +function DeployStaticResources([hashtable]$params) { + Write-Host "Deploying static resources" + + RunOrExitOnFailure az group create ` + -n $params.staticTestSecretsKeyvaultGroup ` + -l $params.clusterLocation ` + --subscription $params.subscriptionId + $kv = Run az keyvault show ` + -n $params.staticTestSecretsKeyvaultName ` + -g $params.staticTestSecretsKeyvaultGroup ` + --subscription $params.subscriptionId + if (!$kv) { + RunOrExitOnFailure az keyvault create ` + -n $params.staticTestSecretsKeyvaultName ` + -g $params.staticTestSecretsKeyvaultGroup ` + --subscription $params.subscriptionId + } + + $sp = RunOrExitOnFailure az ad sp create-for-rbac ` + -o json ` + -n 'stress-provisioner-$env' ` + --role Contributor ` + --scopes "/subscriptions/$($params.subscriptionId)" + $spInfo = $sp | ConvertFrom-Json + $oid = (RunOrExitOnFailure az ad sp show -o json --id $spInfo.appId | ConvertFrom-Json).objectId + + $credentials = @{ + AZURE_CLIENT_ID = $spInfo.appId + AZURE_CLIENT_SECRET = $spInfo.password + AZURE_CLIENT_OID = $oid + AZURE_TENANT_ID = $spInfo.tenant + AZURE_SUBSCRIPTION_ID = $params.subscriptionId + } + + $dotenv = $credentials.GetEnumerator() | ForEach-Object { "$($_.Key)=$($_.Value)" } + $secret = $dotenv -join "`n" + + RunOrExitOnFailure az keyvault secret set --vault-name $params.staticTestSecretsKeyvaultName --value $secret -n public +} + +function UpdateOutputs([hashtable]$params) { + $outputs = (az deployment sub show ` + -o json ` + -n stress-deploy-$env ` + --query properties.outputs ` + --subscription $params.subscriptionId + ) | ConvertFrom-Json + + $valuesFile = "$PSScriptRoot/kubernetes/stress-test-addons/values.yaml" + $values = ConvertFrom-Yaml -Ordered (Get-Content -Raw $valuesFile) + + $values.appInsightsKeySecretName.$env = $outputs.APPINSIGHTS_KEY_SECRET_NAME.value + $values.debugStorageKeySecretName.$env = $outputs.DEBUG_STORAGE_KEY_SECRET_NAME.value + $values.debugStorageAccountSecretName.$env = $outputs.DEBUG_STORAGE_ACCOUNT_SECRET_NAME.value + $values.debugFileShareName.$env = $outputs.DEBUG_FILESHARE_NAME.value + $values.staticTestSecretsKeyvaultName.$env = $outputs.STATIC_TEST_SECRETS_KEYVAULT.value + $values.clusterTestSecretsKeyvaultName.$env = $outputs.CLUSTER_TEST_SECRETS_KEYVAULT.value + $values.secretProviderIdentity.$env = $outputs.SECRET_PROVIDER_CLIENT_ID.value + $values.tenantId.$env = $outputs.TENANT_ID.value + + $values | ConvertTo-Yaml | Out-File $valuesFile + + Write-Host "$valuesFile has been updated and must be checked in." +} + +function DeployClusterResources([hashtable]$params) { + Write-Host "Deploying stress cluster resources" + RunOrExitOnFailure az deployment sub create ` + -o json ` + --subscription $params.subscriptionId ` + -n stress-deploy-$env ` + -l $params.clusterLocation ` + -f $PSScriptRoot/azure/main.bicep ` + --parameters $PSScriptRoot/azure/parameters/$env.json + + UpdateOutputs $params + + Write-Host "Importing cluster credentials" + RunOrExitOnFailure az aks get-credentials ` + -n $params.clusterName ` + -g rg-stress-cluster-$($params.groupSuffix) ` + --overwrite ` + --subscription $params.subscriptionId + + Write-Host "Installing stress infrastructure charts" + RunOrExitOnFailure helm repo add chaos-mesh https://charts.chaos-mesh.org + RunOrExitOnFailure helm dependency update $PSScriptRoot/kubernetes/stress-infrastructure + RunOrExitOnFailure kubectl create namespace stress-infra --dry-run=client -o yaml | kubectl apply -f - + RunOrExitOnFailure helm upgrade --install stress-infra ` + -n stress-infra ` + $PSScriptRoot/kubernetes/stress-infrastructure +} + +function LoadEnvParams() { + $params = (Get-Content $PSScriptRoot/azure/parameters/$env.json | ConvertFrom-Json -AsHashtable).parameters + + if (!$params) { + Write-Error "Error loading parameters file at $PSScriptRoot/azure/parameters/$env.json" + exit 1 + } + + $paramHash = @{} + foreach ($p in $params.GetEnumerator()) { + $paramHash[$p.Key] = $p.Value.value + } + + return $paramHash +} + +function main() { + # . (Join-Path $PSScriptRoot "../Helpers" PSModule-Helpers.ps1) + # Install-ModuleIfNotInstalled "powershell-yaml" "0.4.1" | Import-Module + + $params = LoadEnvParams + + DeployStaticResources $params + DeployClusterResources $params +} + +# Don't call functions when the script is being dot sourced +if ($MyInvocation.InvocationName -ne ".") { + $ErrorActionPreference = 'Stop' + main +} diff --git a/tools/stress-cluster/services/Stress.Watcher/src/PodEventHandler.cs b/tools/stress-cluster/services/Stress.Watcher/src/PodEventHandler.cs index c9995cd374d3..c94a286dbf58 100644 --- a/tools/stress-cluster/services/Stress.Watcher/src/PodEventHandler.cs +++ b/tools/stress-cluster/services/Stress.Watcher/src/PodEventHandler.cs @@ -164,8 +164,24 @@ await Client.PatchNamespacedCustomObjectWithHttpMessagesAsync( await Task.WhenAll(tasks); } + public string GetResourceGroupNameIfExists(V1Pod pod) + { + // k8s.Models V1Pod + var deployContainers = pod.Spec.InitContainers.Where(c => c.Name == "init-azure-deployer"); + if (deployContainers.Count() == 0) { + return ""; + } + // k8s.Models V1Container + var baseNameVar = deployContainers.First().Env.Where(e => e.Name == "BASE_NAME").Select(e => e.Value); + if (baseNameVar == null) { + return ""; + } + return baseNameVar.ToString(); + } + public bool ShouldStartChaos(GenericChaosResource chaos, V1Pod pod) { + if (chaos.Spec.Selector.LabelSelectors?.TestInstance != pod.TestInstance()) { return false;