diff --git a/eng/common/scripts/stress-testing/stress-test-deployment-lib.ps1 b/eng/common/scripts/stress-testing/stress-test-deployment-lib.ps1 index f05f7388f78..c3468a111b5 100644 --- a/eng/common/scripts/stress-testing/stress-test-deployment-lib.ps1 +++ b/eng/common/scripts/stress-testing/stress-test-deployment-lib.ps1 @@ -213,9 +213,17 @@ function DeployStressPackage( $imageTagBase += "/$($pkg.Namespace)/$($pkg.ReleaseName)" if (!$Template) { - Write-Host "Creating namespace $($pkg.Namespace) if it does not exist..." - kubectl create namespace $pkg.Namespace --dry-run=client -o yaml | kubectl apply -f - - if ($LASTEXITCODE) {exit $LASTEXITCODE} + Write-Host "Checking for namespace $($pkg.Namespace)" + kubectl get namespace $pkg.Namespace + if ($LASTEXITCODE) { + Write-Host "Creating namespace $($pkg.Namespace) ..." + kubectl create namespace $pkg.Namespace --dry-run=client -o yaml | kubectl apply -f - + if ($LASTEXITCODE) {exit $LASTEXITCODE} + # Give a few seconds for stress watcher to initialize the federated identity credential + # and create the service account before we reference it + Write-Host "Waiting 15 seconds for namespace federated credentials to be created and synced" + Start-Sleep 15 + } Write-Host "Adding default resource requests to namespace/$($pkg.Namespace)" $limitRangeSpec | kubectl apply -n $pkg.Namespace -f - if ($LASTEXITCODE) {exit $LASTEXITCODE} diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock index 7225e1f51cd..cbfe3cfad57 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.1 -digest: sha256:28e374f8db5c46447b2a1491d4361ceb126536c425cbe54be49017120fe7b27d -generated: "2024-01-17T15:38:17.871619598-05:00" + version: 0.3.2 +digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764 +generated: "2024-05-23T11:38:32.810490735-04:00" diff --git a/tools/stress-cluster/chaos/examples/network-stress-scenarios-example/Chart.lock b/tools/stress-cluster/chaos/examples/network-stress-scenarios-example/Chart.lock index 6afd84b988a..02c41e4d2ef 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-scenarios-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/network-stress-scenarios-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.1 -digest: sha256:28e374f8db5c46447b2a1491d4361ceb126536c425cbe54be49017120fe7b27d -generated: "2024-01-17T15:39:38.364921715-05:00" + version: 0.3.2 +digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764 +generated: "2024-05-23T11:38:01.807752664-04:00" diff --git a/tools/stress-cluster/chaos/examples/parallel-pod-example/Chart.lock b/tools/stress-cluster/chaos/examples/parallel-pod-example/Chart.lock index 25192df81d1..9406901ed6e 100644 --- a/tools/stress-cluster/chaos/examples/parallel-pod-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/parallel-pod-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.1 -digest: sha256:28e374f8db5c46447b2a1491d4361ceb126536c425cbe54be49017120fe7b27d -generated: "2024-01-17T15:40:00.504665427-05:00" + version: 0.3.2 +digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764 +generated: "2024-05-23T11:38:47.628996062-04:00" diff --git a/tools/stress-cluster/chaos/examples/parallel-pod-example/templates/parallel-pod.yaml b/tools/stress-cluster/chaos/examples/parallel-pod-example/templates/parallel-pod.yaml index f522311a987..d2c31c7a65e 100644 --- a/tools/stress-cluster/chaos/examples/parallel-pod-example/templates/parallel-pod.yaml +++ b/tools/stress-cluster/chaos/examples/parallel-pod-example/templates/parallel-pod.yaml @@ -11,7 +11,7 @@ spec: args: - | source $ENV_FILE && - az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_CLIENT_SECRET --tenant $AZURE_TENANT_ID && + az login --federated-token "$(cat $AZURE_FEDERATED_TOKEN_FILE)" --service-principal -u "$AZURE_CLIENT_ID" -t "$AZURE_TENANT_ID" && az appconfig show -n $APP_CONFIG_NAME -g $RESOURCE_GROUP --subscription $AZURE_SUBSCRIPTION_ID -o table && echo "Completed pod instance $JOB_COMPLETION_INDEX" {{- include "stress-test-addons.container-env" . | nindent 6 }} diff --git a/tools/stress-cluster/chaos/examples/stress-debug-share-example/Chart.lock b/tools/stress-cluster/chaos/examples/stress-debug-share-example/Chart.lock index be5ed8df204..c0c964081b2 100644 --- a/tools/stress-cluster/chaos/examples/stress-debug-share-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/stress-debug-share-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.1 -digest: sha256:28e374f8db5c46447b2a1491d4361ceb126536c425cbe54be49017120fe7b27d -generated: "2024-01-17T15:39:47.856708817-05:00" + version: 0.3.2 +digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764 +generated: "2024-05-23T11:38:19.251210631-04:00" diff --git a/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock index 62f37876671..df25f68f412 100644 --- a/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.1 -digest: sha256:28e374f8db5c46447b2a1491d4361ceb126536c425cbe54be49017120fe7b27d -generated: "2024-01-17T15:39:23.757382734-05:00" + version: 0.3.2 +digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764 +generated: "2024-05-23T11:37:41.371010465-04:00" diff --git a/tools/stress-cluster/chaos/examples/stress-deployment-example/templates/deploy-job.yaml b/tools/stress-cluster/chaos/examples/stress-deployment-example/templates/deploy-job.yaml index 07ade7509e1..91852183f15 100644 --- a/tools/stress-cluster/chaos/examples/stress-deployment-example/templates/deploy-job.yaml +++ b/tools/stress-cluster/chaos/examples/stress-deployment-example/templates/deploy-job.yaml @@ -13,7 +13,7 @@ spec: args: - | source $ENV_FILE && - az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_CLIENT_SECRET --tenant $AZURE_TENANT_ID && + az login --federated-token "$(cat $AZURE_FEDERATED_TOKEN_FILE)" --service-principal -u "$AZURE_CLIENT_ID" -t "$AZURE_TENANT_ID" && az account set -s $AZURE_SUBSCRIPTION_ID && az appconfig show -n $APP_CONFIG_NAME -g $RESOURCE_GROUP -o json {{- include "stress-test-addons.container-env" . | nindent 6 }} diff --git a/tools/stress-cluster/cluster/azure/cluster/cluster.bicep b/tools/stress-cluster/cluster/azure/cluster/cluster.bicep index 876bf129ebb..75c01a44e8b 100644 --- a/tools/stress-cluster/cluster/azure/cluster/cluster.bicep +++ b/tools/stress-cluster/cluster/azure/cluster/cluster.bicep @@ -24,7 +24,7 @@ var systemAgentPool = { mode: 'System' vmSize: 'Standard_D4ds_v4' type: 'VirtualMachineScaleSets' - osType: 'AzureLinux' + osType: 'Linux' enableAutoScaling: true enableEncryptionAtHost: true nodeLabels: { @@ -40,7 +40,7 @@ var defaultAgentPool = { mode: 'User' vmSize: 'Standard_D8a_v4' type: 'VirtualMachineScaleSets' - osType: 'AzureLinux' + osType: 'Linux' osDiskType: 'Ephemeral' enableAutoScaling: true enableEncryptionAtHost: true @@ -87,6 +87,14 @@ resource newCluster 'Microsoft.ContainerService/managedClusters@2023-02-02-previ servicePrincipalProfile: { clientId: 'msi' } + oidcIssuerProfile: { + enabled: true + } + securityProfile: { + workloadIdentity: { + enabled: true + } + } nodeResourceGroup: nodeResourceGroup } } @@ -151,4 +159,5 @@ resource metricsPublisher 'Microsoft.Authorization/roleAssignments@2020-04-01-pr output secretProviderObjectId string = cluster.properties.addonProfiles.azureKeyvaultSecretsProvider.identity.objectId output secretProviderClientId string = cluster.properties.addonProfiles.azureKeyvaultSecretsProvider.identity.clientId output kubeletIdentityObjectId string = cluster.properties.identityProfile.kubeletidentity.objectId +output workloadAppIssuer string = cluster.properties.oidcIssuerProfile.issuerURL output clusterName string = clusterName diff --git a/tools/stress-cluster/cluster/azure/cluster/static-vault-access-policy.bicep b/tools/stress-cluster/cluster/azure/cluster/static-vault-access-policy.bicep deleted file mode 100644 index c9119947947..00000000000 --- a/tools/stress-cluster/cluster/azure/cluster/static-vault-access-policy.bicep +++ /dev/null @@ -1,22 +0,0 @@ -param vaultName string -param objectId string -param tenantId string - -// Add cluster node identity to statically configured stress test secrets keyvault -resource stressTestVault 'Microsoft.KeyVault/vaults/accessPolicies@2019-09-01' = { - name: '${vaultName}/add' - properties: { - accessPolicies: [ - { - objectId: objectId - tenantId: tenantId - permissions: { - secrets: [ - 'list' - 'get' - ] - } - } - ] - } -} diff --git a/tools/stress-cluster/cluster/azure/cluster/workloadappidentities.bicep b/tools/stress-cluster/cluster/azure/cluster/workloadappidentities.bicep new file mode 100644 index 00000000000..a03fa777648 --- /dev/null +++ b/tools/stress-cluster/cluster/azure/cluster/workloadappidentities.bicep @@ -0,0 +1,35 @@ +param groupSuffix string +param location string + +param infraNamespace string +param infraWorkloadServiceAccountName string +param workloadAppIssuer string +param workloadAppPoolCount int + +resource infraWorkloadApp 'Microsoft.ManagedIdentity/userAssignedIdentities@2022-01-31-preview' = { + name: 'stress-infra-workload-${groupSuffix}' + location: location + + resource creds 'federatedIdentityCredentials' = { + name: 'stress-infra-federated-${groupSuffix}' + properties: { + issuer: workloadAppIssuer + audiences: ['api://AzureADTokenExchange'] + subject: 'system:serviceaccount:${infraNamespace}:${infraWorkloadServiceAccountName}' + } + } +} + +resource workloadApps 'Microsoft.ManagedIdentity/userAssignedIdentities@2022-01-31-preview' = [for i in range(0, workloadAppPoolCount): { + name: 'stress-app-workload-${groupSuffix}-${i}' + location: location +}] + +output infraWorkloadAppClientId string = infraWorkloadApp.properties.clientId +output infraWorkloadAppObjectId string = infraWorkloadApp.properties.principalId + +output workloadAppInfo array = [for i in range(0, workloadAppPoolCount): { + name: 'stress-app-workload-${groupSuffix}-${i}' + clientId: workloadApps[i].properties.clientId + objectId: workloadApps[i].properties.principalId +}] diff --git a/tools/stress-cluster/cluster/azure/cluster/workloadapproles.bicep b/tools/stress-cluster/cluster/azure/cluster/workloadapproles.bicep new file mode 100644 index 00000000000..0c1a25a4ee9 --- /dev/null +++ b/tools/stress-cluster/cluster/azure/cluster/workloadapproles.bicep @@ -0,0 +1,54 @@ +targetScope = 'subscription' + +param infraWorkloadAppObjectId string +param workloadApps array + +var serviceBusDataOwnerRoleId = '090c5cfd-751d-490a-894a-3ce6f1109419' +var eventHubsDataOwnerRoleId = 'f526a384-b230-433a-b45c-95f59c4a2dec' +var contributorRoleId = 'b24988ac-6180-42a0-ab88-20f7382dd24c' +var userAccessAdministratorRoleId = '18d7d88d-d35e-4fb5-a5c3-7773c20a72d9' + +resource infraWorkloadAppContrib 'Microsoft.Authorization/roleAssignments@2021-04-01-preview' = { + name: guid('infraWorkloadAppContrib', subscription().id, infraWorkloadAppObjectId) + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', contributorRoleId) + principalId: infraWorkloadAppObjectId + principalType: 'ServicePrincipal' + } +} + +resource infraWorkloadAppUA 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { + name: guid('infraWorkloadAppUA', subscription().id, infraWorkloadAppObjectId) + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', userAccessAdministratorRoleId) + principalId: infraWorkloadAppObjectId + principalType: 'ServicePrincipal' + } +} + +resource workloadAppContrib 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = [for i in range(0, length(workloadApps)): { + name: guid('workloadAppContrib', subscription().id, workloadApps[i].objectId) + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', contributorRoleId) + principalId: workloadApps[i].objectId + principalType: 'ServicePrincipal' + } +}] + +resource workloadAppEHDataOwner 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = [for i in range(0, length(workloadApps)): { + name: guid('workloadAppEHDataOwner', subscription().id, workloadApps[i].objectId) + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', eventHubsDataOwnerRoleId) + principalId: workloadApps[i].objectId + principalType: 'ServicePrincipal' + } +}] + +resource workloadAppSBDataOwner 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = [for i in range(0, length(workloadApps)): { + name: guid('workloadAppSBDataOwner', subscription().id, workloadApps[i].objectId) + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', serviceBusDataOwnerRoleId) + principalId: workloadApps[i].objectId + principalType: 'ServicePrincipal' + } +}] diff --git a/tools/stress-cluster/cluster/azure/main.bicep b/tools/stress-cluster/cluster/azure/main.bicep index 1abe4033728..dbd933f19ce 100644 --- a/tools/stress-cluster/cluster/azure/main.bicep +++ b/tools/stress-cluster/cluster/azure/main.bicep @@ -3,9 +3,8 @@ targetScope = 'subscription' param subscriptionId string = '' param groupSuffix string param clusterName string +param infraNamespace string = 'stress-infra' param clusterLocation string = 'westus3' -param staticTestKeyvaultName string -param staticTestKeyvaultGroup string param monitoringLocation string = 'centralus' param defaultAgentPoolMinNodes int = 6 param defaultAgentPoolMaxNodes int = 20 @@ -14,6 +13,8 @@ param tags object // AKS does not allow agentPool updates via existing managed cluster resources param updateNodes bool = false +var workloadAppPoolCount = 5 + // Azure Developer Platform Team Group // https://ms.portal.azure.com/#blade/Microsoft_AAD_IAM/GroupDetailsMenuBlade/Overview/groupId/56709ad9-8962-418a-ad0d-4b25fa962bae param accessGroups array = [ @@ -154,17 +155,28 @@ module keyvault 'cluster/keyvault.bicep' = { } } -module accessPolicy 'cluster/static-vault-access-policy.bicep' = { - name: 'accessPolicy' - scope: resourceGroup(staticTestKeyvaultGroup) - params: { - vaultName: staticTestKeyvaultName - tenantId: subscription().tenantId - objectId: cluster.outputs.secretProviderObjectId - } +module workloadAppIdentities 'cluster/workloadappidentities.bicep' = if (!updateNodes) { + name: 'workloadAppIdentities' + scope: group + params: { + groupSuffix: groupSuffix + location: clusterLocation + infraNamespace: infraNamespace + infraWorkloadServiceAccountName: 'workload-svc' + workloadAppIssuer: cluster.outputs.workloadAppIssuer + workloadAppPoolCount: workloadAppPoolCount + } +} + +module workloadAppRoles 'cluster/workloadapproles.bicep' = if (!updateNodes) { + name: 'workloadAppRoles' + scope: subscription() + params: { + infraWorkloadAppObjectId: workloadAppIdentities.outputs.infraWorkloadAppObjectId + workloadApps: workloadAppIdentities.outputs.workloadAppInfo + } } -output STATIC_TEST_SECRETS_KEYVAULT string = staticTestKeyvaultName output CLUSTER_TEST_SECRETS_KEYVAULT string = keyvault.outputs.keyvaultName output SECRET_PROVIDER_CLIENT_ID string = cluster.outputs.secretProviderClientId output CLUSTER_NAME string = cluster.outputs.clusterName @@ -181,3 +193,8 @@ output STATUS_DASHBOARD_LINK string = 'https://ms.portal.azure.com/#@microsoft.o output RESOURCE_GROUP string = group.name output SUBSCRIPTION_ID string = subscriptionId output TENANT_ID string = subscription().tenantId +output INFRA_WORKLOAD_APP_SERVICE_ACCOUNT_NAME string = 'workload-svc' +output INFRA_WORKLOAD_APP_CLIENT_ID string = workloadAppIdentities.outputs.infraWorkloadAppClientId +output INFRA_WORKLOAD_APP_OBJECT_ID string = workloadAppIdentities.outputs.infraWorkloadAppObjectId +output WORKLOAD_APP_ISSUER string = cluster.outputs.workloadAppIssuer +output WORKLOAD_APPS string = string(workloadAppIdentities.outputs.workloadAppInfo) diff --git a/tools/stress-cluster/cluster/azure/parameters/pg.json b/tools/stress-cluster/cluster/azure/parameters/pg.json index 74cc7250d67..025dcb8a05d 100644 --- a/tools/stress-cluster/cluster/azure/parameters/pg.json +++ b/tools/stress-cluster/cluster/azure/parameters/pg.json @@ -14,12 +14,6 @@ "clusterLocation": { "value": "westus3" }, - "staticTestKeyvaultName": { - "value": "stress-secrets-pg" - }, - "staticTestKeyvaultGroup": { - "value": "rg-stress-secrets-pg" - }, "defaultAgentPoolMinNodes": { "value": 6 }, diff --git a/tools/stress-cluster/cluster/azure/parameters/prod.json b/tools/stress-cluster/cluster/azure/parameters/prod.json index b9565c38566..3bd9217c97e 100644 --- a/tools/stress-cluster/cluster/azure/parameters/prod.json +++ b/tools/stress-cluster/cluster/azure/parameters/prod.json @@ -17,12 +17,6 @@ "monitoringLocation": { "value": "centralus" }, - "staticTestKeyvaultName": { - "value": "stress-secrets-prod" - }, - "staticTestKeyvaultGroup": { - "value": "rg-stress-secrets-prod" - }, "defaultAgentPoolMinNodes": { "value": 2 }, diff --git a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/Chart.lock b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/Chart.lock index 2f15c6bda1a..5dd4d7df11c 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/Chart.lock +++ b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/Chart.lock @@ -4,6 +4,6 @@ dependencies: version: 2.6.3 - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.1 -digest: sha256:b954ab9bea8f484f3110a2051e384621f6cfb0188cccd3911299da3aad6fd951 -generated: "2024-05-08T14:47:49.4174026-07:00" + version: 0.3.2 +digest: sha256:59235c0eac423267e28d9ac61392532ea74fb37a1be2567e4ac83277d62d8761 +generated: "2024-05-23T11:44:39.658622055-04:00" diff --git a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/infra-svc.yaml b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/infra-svc.yaml new file mode 100644 index 00000000000..02d5a5e755d --- /dev/null +++ b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/infra-svc.yaml @@ -0,0 +1,8 @@ +{{- $addons := get .Values "stress-test-addons" -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + azure.workload.identity/client-id: {{ get $addons.infraWorkloadAppClientId $addons.env }} + name: {{ get $addons.infraWorkloadAppServiceAccountName $addons.env }} + namespace: {{ .Release.Namespace }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role-binding.yaml b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role-binding.yaml index 3b64e0fc078..7940d8be501 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role-binding.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role-binding.yaml @@ -1,3 +1,4 @@ +{{- $addons := get .Values "stress-test-addons" -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -6,6 +7,9 @@ subjects: - namespace: {{ .Release.Namespace }} kind: ServiceAccount name: default +- namespace: {{ .Release.Namespace }} + kind: ServiceAccount + name: {{ get $addons.infraWorkloadAppServiceAccountName $addons.env }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role.yaml b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role.yaml index 418849aac8f..108c67e4b74 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher-cluster-role.yaml @@ -11,5 +11,6 @@ rules: - get - list - watch + - create - update - patch diff --git a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher.yaml b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher.yaml index 53713d26fa2..e56adb6e6ce 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-infrastructure/templates/stresswatcher.yaml @@ -1,4 +1,5 @@ {{ $ctx := fromYaml (include "stress-test-addons.util.mergeStressContext" (list . (dict "Scenario" "watcher") )) }} +{{- $addons := get .Values "stress-test-addons" -}} apiVersion: apps/v1 kind: Deployment metadata: @@ -16,7 +17,9 @@ spec: namespace: {{ .Release.Namespace }} labels: app: stress-watcher + azure.workload.identity/use: "true" spec: + serviceAccountName: {{ get $addons.infraWorkloadAppServiceAccountName $addons.env }} nodeSelector: sku: 'system' initContainers: @@ -27,12 +30,16 @@ spec: - name: stresswatcher imagePullPolicy: Always image: azsdkengsys.azurecr.io/stress/watcher:{{ .Values.tag }} - command: ["dotnet", "Stress.Watcher.dll"] + command: ["sh", "-c"] + args: + - > + az login --federated-token "$(cat $AZURE_FEDERATED_TOKEN_FILE)" --service-principal -u $AZURE_CLIENT_ID -t $AZURE_TENANT_ID; + ./Stress.Watcher \ + --workload-app-issuer "{{ get $addons.workloadAppIssuer $addons.env }}" \ + --workload-app-pool "{{ get $addons.workloadAppClientNamePool $addons.env }}" {{- include "stress-test-addons.container-env" $ctx | nindent 8 }} volumes: # Volume template for mounting secrets {{- include "stress-test-addons.env-volumes" $ctx | nindent 8 }} # Volume template for mounting azure file share for debugging {{- include "stress-test-addons.debug-file-volumes" $ctx | nindent 8 }} - -{{- include "stress-test-addons.static-secrets" . }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/CHANGELOG.md b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/CHANGELOG.md index b759296376e..ce567089a7a 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/CHANGELOG.md +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/CHANGELOG.md @@ -1,5 +1,15 @@ # Release History +## 0.3.2 (2024-05-15) + +### Features Added + +Add support for AKS workload identity auth mechanism + +### Breaking Changes + +Tests must use WorkloadIdentityCredential or DefaultAzureCredential. Any tests leveraging EnvironmentCredential and/or service principal password auth will no longer work. + ## 0.3.1 (2024-01-17) ### Features Added diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml index 929a88dde2d..5c5f2cc0695 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: stress-test-addons description: Baseline resources and templates for stress testing clusters -version: 0.3.1 +version: 0.3.2 appVersion: v0.1 diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/Dockerfile b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/Dockerfile index 273e16f8350..542264b9d7f 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/Dockerfile +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/Dockerfile @@ -1,10 +1,5 @@ -FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 +FROM mcr.microsoft.com/azure-powershell:mariner-2 -RUN curl -sSL -O https://packages.microsoft.com/config/rhel/7/packages-microsoft-prod.rpm -RUN rpm -i packages-microsoft-prod.rpm -RUN rm packages-microsoft-prod.rpm -RUN yum install powershell -y -RUN pwsh -c '$ErrorActionPreference = "Stop"; Install-Module Az -Force'; RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" RUN install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl RUN kubectl version --client diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/deploy-stress-test-resources.ps1 b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/deploy-stress-test-resources.ps1 index 16af8050dc9..4cce79e7671 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/deploy-stress-test-resources.ps1 +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/images/test-resource-deployer/deploy-stress-test-resources.ps1 @@ -1,16 +1,3 @@ -$secrets = @{} -$secretsDir = "/mnt/secrets/static/*" -Get-ChildItem -Path $secretsDir | ForEach-Object { - foreach($line in Get-Content $_) { - $idx = $line.IndexOf("=") - if ($idx -gt 0) { - $key = $line.Substring(0, $idx) - $val = $line.Substring($idx + 1) - $secrets.Add($key, $val) - } - } -} - mkdir /azure Copy-Item "/scripts/stress-test/test-resources-post.ps1" -Destination "/azure/" Copy-Item "/mnt/testresources/*" -Destination "/azure/" @@ -33,17 +20,28 @@ if ($env:JOB_COMPLETION_INDEX -and ($env:JOB_COMPLETION_INDEX -ne "0")) { } } +Write-Host "Logging in with federated token" +# Token file, Tenant and Client IDs are set by AKS when workload identity is enabled +$token = Get-Content -Raw $env:AZURE_FEDERATED_TOKEN_FILE +Connect-AzAccount -ServicePrincipal -Tenant $env:AZURE_TENANT_ID -ApplicationId $env:AZURE_CLIENT_ID -FederatedToken $token + +Write-Host "Finding provisioner object id" +$identity = Get-AzUserAssignedIdentity -ResourceGroupName $env:STRESS_CLUSTER_RESOURCE_GROUP | Where-Object { $_.ClientId -eq $env:AZURE_CLIENT_ID } +if (!$identity) { + throw "User Assigned Identity $($env:AZURE_CLIENT_ID) not found in resource group $($env:STRESS_CLUSTER_RESOURCE_GROUP)" +} + # Capture output so we don't print environment variable secrets $env = & /common/TestResources/New-TestResources.ps1 ` -BaseName $env:BASE_NAME ` -ResourceGroupName $env:RESOURCE_GROUP_NAME ` - -SubscriptionId $secrets.AZURE_SUBSCRIPTION_ID ` - -TenantId $secrets.AZURE_TENANT_ID ` - -ProvisionerApplicationId $secrets.AZURE_CLIENT_ID ` - -ProvisionerApplicationSecret $secrets.AZURE_CLIENT_SECRET ` - -TestApplicationId $secrets.AZURE_CLIENT_ID ` - -TestApplicationSecret $secrets.AZURE_CLIENT_SECRET ` - -TestApplicationOid $secrets.AZURE_CLIENT_OID ` + -SubscriptionId $env:AZURE_SUBSCRIPTION_ID ` + -TenantId $env:AZURE_TENANT_ID ` + -ProvisionerApplicationId $env:AZURE_CLIENT_ID ` + -ProvisionerApplicationSecret $identity.PrincipalId ` + -TestApplicationId $env:AZURE_CLIENT_ID ` + -TestApplicationSecret "" ` + -TestApplicationOid $identity.PrincipalId ` -Location 'westus3' ` -DeleteAfterHours 168 ` -ServiceDirectory '/azure/' ` diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml index 06bdfd57952..edb3adc5f88 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml @@ -1,6 +1,15 @@ apiVersion: v1 entries: stress-test-addons: + - apiVersion: v2 + appVersion: v0.1 + created: "2024-05-15T19:50:35.339373231-04:00" + description: Baseline resources and templates for stress testing clusters + digest: 10be1030c33c94404da12be096506d539dc310616ea05af8ea30eca8699cb507 + name: stress-test-addons + urls: + - https://stresstestcharts.blob.core.windows.net/helm/stress-test-addons-0.3.2.tgz + version: 0.3.2 - apiVersion: v2 appVersion: v0.1 created: "2024-01-17T15:37:50.337580687-05:00" @@ -208,4 +217,4 @@ entries: urls: - https://stresstestcharts.blob.core.windows.net/helm/stress-test-addons-0.1.2.tgz version: 0.1.2 -generated: "2024-01-17T15:37:50.322863305-05:00" +generated: "2024-05-15T19:50:35.33115471-04:00" diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl index 146827cdcb2..bad47804b26 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_container_env.tpl @@ -1,4 +1,5 @@ {{- define "stress-test-addons.container-env" -}} +{{- $addons := get .Values "stress-test-addons" -}} env: - name: ENV_FILE value: /mnt/outputs/.env @@ -18,6 +19,10 @@ env: value: {{ .Stress.Scenario }} - name: GIT_COMMIT value: {{ .Values.GitCommit | default "" }} + - name: AZURE_SUBSCRIPTION_ID + value: {{ get $addons.subscriptionId $addons.env }} + - name: STRESS_CLUSTER_RESOURCE_GROUP + value: {{ get $addons.clusterGroup $addons.env }} volumeMounts: - name: test-env-{{ lower .Stress.Scenario }}-{{ .Release.Name }}-{{ .Release.Revision }} mountPath: /mnt/outputs diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_env_volumes.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_env_volumes.tpl index 585edede50b..a97a34b3721 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_env_volumes.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_env_volumes.tpl @@ -13,10 +13,4 @@ readOnly: true volumeAttributes: secretProviderClass: stress-cluster-kv-{{ .Release.Name }} -- name: static-secrets-{{ .Release.Name }}-{{ .Stress.SubscriptionConfig }} - csi: - driver: secrets-store.csi.k8s.io - readOnly: true - volumeAttributes: - secretProviderClass: stress-static-kv-{{ .Release.Name }}-{{ .Stress.SubscriptionConfig }} {{ end }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl index dd3bae9b6e5..5f704f6198b 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_deploy.tpl @@ -1,4 +1,5 @@ {{ define "stress-test-addons.init-deploy" }} +{{- $addons := get .Values "stress-test-addons" -}} - name: init-azure-deployer # Please use 'testing' for the image repo name when testing # e.g. azsdkengsys.azurecr.io/testing/deploy-test-resources @@ -14,6 +15,10 @@ env: - name: ENV_FILE value: /mnt/outputs/.env + - name: AZURE_SUBSCRIPTION_ID + value: {{ get $addons.subscriptionId $addons.env }} + - name: STRESS_CLUSTER_RESOURCE_GROUP + value: {{ get $addons.clusterGroup $addons.env }} - name: RESOURCE_GROUP_NAME value: {{ .Stress.ResourceGroupName }} - name: BASE_NAME @@ -27,7 +32,4 @@ mountPath: /mnt/testresources - name: test-env-{{ lower .Stress.Scenario }}-{{ .Release.Name }}-{{ .Release.Revision }} mountPath: /mnt/outputs - - name: "static-secrets-{{ .Release.Name }}-{{ .Stress.SubscriptionConfig }}" - mountPath: "/mnt/secrets/static" - readOnly: true {{ end }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl index 9d6fb8b976c..34480ccc4f8 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_init_env.tpl @@ -5,16 +5,13 @@ # Secret values are expected to be in format = command: ['sh', '-c'] args: - - "cat /mnt/secrets/static/* /mnt/secrets/cluster/* > $ENV_FILE" + - "cat /mnt/secrets/cluster/* > $ENV_FILE" env: - name: ENV_FILE value: /mnt/outputs/.env volumeMounts: - name: test-env-{{ lower .Stress.Scenario }}-{{ .Release.Name }}-{{ .Release.Revision }} mountPath: /mnt/outputs - - name: static-secrets-{{ .Release.Name }}-{{ .Stress.SubscriptionConfig }} - mountPath: "/mnt/secrets/static" - readOnly: true - name: cluster-secrets-{{ .Release.Name }} mountPath: "/mnt/secrets/cluster" readOnly: true diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl index 6f2c0b56520..da9e67aec0a 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_stress_test.tpl @@ -28,6 +28,7 @@ spec: template: metadata: labels: + azure.workload.identity/use: "true" release: {{ .Release.Name }} scenario: {{ .Stress.Scenario }} gitCommit: {{ .Values.GitCommit | default "" }} @@ -36,6 +37,7 @@ spec: deletionLockExpiry: {{ .Values.PodDisruptionBudgetExpiry }} {{- end }} spec: + serviceAccountName: {{ .Release.Namespace }} # In cases where a stress test has higher resource requirements or needs a dedicated node, # a new nodepool can be provisioned and labeled to allow custom scheduling. nodeSelector: @@ -68,7 +70,6 @@ spec: {{- $tpl := fromYaml (include "stress-test-addons.deploy-job-template.tpl" $jobCtx) -}} {{- toYaml (merge $jobOverride $tpl) -}} {{- end }} -{{- include "stress-test-addons.static-secrets" $global }} {{- if $global.Values.PodDisruptionBudgetExpiry }} {{- include "stress-test-addons.pod-disruption-budget" $global }} {{- end }} @@ -96,6 +97,7 @@ spec: template: metadata: labels: + azure.workload.identity/use: "true" release: {{ .Release.Name }} scenario: {{ .Stress.Scenario }} gitCommit: {{ .Values.GitCommit | default "" }} @@ -104,6 +106,7 @@ spec: deletionLockExpiry: {{ .Values.PodDisruptionBudgetExpiry }} {{- end }} spec: + serviceAccountName: {{ .Release.Namespace }} nodeSelector: sku: 'default' restartPolicy: Never @@ -129,7 +132,6 @@ spec: {{- $tpl := fromYaml (include "stress-test-addons.env-job-template.tpl" $jobCtx) -}} {{- toYaml (merge $jobOverride $tpl) -}} {{- end }} -{{- include "stress-test-addons.static-secrets" $global }} {{- if $global.Values.PodDisruptionBudgetExpiry }} {{- include "stress-test-addons.pod-disruption-budget" $global }} {{- end }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_util.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_util.tpl index 714dcfde19e..a619725bd9c 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_util.tpl +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/_util.tpl @@ -28,22 +28,18 @@ Fields added to global context and returned: from values.yaml or a default value "stress". .Stress.ResourceGroupName - A pre-calculated resource group name value that can be passed down to various configurations that require it. .Stress.BaseName - A random, six character, lowercase alpha string that can be used for naming and is valid for most azure resources. -.Stress.SubscriptionConfig - Secret name from which to load subscription target and credentials See https://github.com/Masterminds/sprig/tree/master/docs for template function reference */}} {{- define "stress-test-addons.util.mergeStressContext" -}} {{- /* Copy scenario name into top level keys of global context */}} {{- $_global := index . 0 -}} -{{- /* Load values.yaml of stress-test-addons sub chart to get subscription config defaults */}} -{{- $_subChart := get $_global.Values "stress-test-addons" }} {{- $_scenario := index . 1 -}} {{- $resourceGroupName := lower (print $_global.Release.Namespace "-" $_scenario.Scenario "-" $_global.Release.Name "-" $_global.Release.Revision) -}} {{- /* Use lowercase alphanumeric characters beginning with a letter for maximum azure resource naming compatibility */ -}} {{- $uniqueTestId := lower (print "s" (trunc 5 (sha1sum $resourceGroupName) ) ) -}} -{{- $subConfig := lower (coalesce $_scenario.subscriptionConfig (get $_subChart.subscription $_subChart.env)) }} {{- /* Create add Stress context to top level keys of global context */}} -{{- $_stress := dict "ResourceGroupName" $resourceGroupName "BaseName" $uniqueTestId "SubscriptionConfig" $subConfig -}} +{{- $_stress := dict "ResourceGroupName" $resourceGroupName "BaseName" $uniqueTestId -}} {{- $_stress := merge $_stress $_scenario -}} {{- $_instance := deepCopy ($_global | merge (dict "Stress" $_stress )) -}} {{ toYaml ($_instance) }} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/reader-role.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/reader-role.yaml index e8ae7cc5490..59663aaa6ee 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/reader-role.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/reader-role.yaml @@ -24,6 +24,9 @@ subjects: - namespace: {{ .Release.Namespace }} kind: ServiceAccount name: default +- namespace: {{ .Release.Namespace }} + kind: ServiceAccount + name: {{ .Release.Namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-static-secret-provider.tpl b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-static-secret-provider.tpl deleted file mode 100644 index f3f06c12297..00000000000 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/templates/stress-test-static-secret-provider.tpl +++ /dev/null @@ -1,37 +0,0 @@ -{{- define "stress-test-addons.static-secrets" -}} -{{- /* The subscriptionConfig key allows a user to use a custom set of subscription+creds uploaded manually to the static keyvault */}} -{{- /* This template finds all unique subscription configs in scenarios and creates a single secret provider for each */}} -{{- $global := . }} -{{- $subChart := get $global.Values "stress-test-addons" }} -{{- $subConfigs := list (get $subChart.subscription $subChart.env) }} -{{- range $global.Values.scenarios }} -{{- $subConfigs = append $subConfigs (coalesce .subscriptionConfig "") }} -{{- end }} -{{- $subConfigs = compact $subConfigs | uniq }} -{{- range $subConfigs }} ---- -apiVersion: secrets-store.csi.x-k8s.io/v1 -kind: SecretProviderClass -metadata: - name: stress-static-kv-{{ $global.Release.Name }}-{{ lower . }} - namespace: {{ $global.Release.Namespace }} -spec: - provider: azure - secretObjects: - - secretName: {{ . }} - type: Opaque - data: - - objectName: {{ . }} - key: value - parameters: - useVMManagedIdentity: "true" - userAssignedIdentityID: {{ get $subChart.secretProviderIdentity $subChart.env }} # az vmss identity show ... - keyvaultName: {{ get $subChart.staticTestSecretsKeyvaultName $subChart.env }} - objects: | - array: - - | - objectName: {{ . }} - objectType: secret - tenantId: {{ get $subChart.tenantId $subChart.env }} -{{- end }} -{{- end -}} diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml index f7e7ec2dd1c..bce8acc94aa 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml @@ -31,16 +31,44 @@ secretProviderIdentity: pg: 0a7293d6-c5fa-47e7-a142-ef40bf6b6764 prod: c5454d90-811d-4d37-b685-f48d6f689aa3 dev: "" +provisionerAppId: + pg: e3fdf864-b936-4279-b787-55adc0f9984a + prod: 5d3c637e-07b7-4b29-83d9-87eb050dfdfb + dev: "" +infraWorkloadAppServiceAccountName: + pg: workload-svc + prod: workload-svc + dev: "" +infraWorkloadAppClientId: + pg: fb633f50-31c7-42af-9640-7651ab7cf69a + prod: daa992f7-ce2c-4ab5-8b51-b06e99caae1a + dev: "" +infraWorkloadAppObjectId: + pg: 135cb549-37ce-4379-8738-39b981753256 + prod: 9c705bcc-3878-4694-a34f-b1c0139315d6 + dev: "" +workloadAppClientNamePool: + pg: stress-app-workload-pg-0,stress-app-workload-pg-1,stress-app-workload-pg-2,stress-app-workload-pg-3,stress-app-workload-pg-4 + prod: stress-app-workload-prod-0,stress-app-workload-prod-1,stress-app-workload-prod-2,stress-app-workload-prod-3,stress-app-workload-prod-4 + dev: +workloadAppIssuer: + pg: https://westus3.oic.prod-aks.azure.com/72f988bf-86f1-41af-91ab-2d7cd011db47/c8b9b4a1-dee9-44e2-93d2-33fc5342ed26/ + prod: https://westus2.oic.prod-aks.azure.com/72f988bf-86f1-41af-91ab-2d7cd011db47/5588aa6f-60c3-4504-8d1a-bbfda356780a/ + dev: +clusterGroup: + pg: rg-stress-cluster-pg + prod: rg-stress-cluster-prod + dev: subscription: pg: public prod: public dev: public +subscriptionId: + pg: faa080af-c1d8-40ad-9cce-e1a450ca5b57 + prod: 2cd617ea-1866-46b1-90e3-fffb087ebf9b + dev: tenantId: pg: 72f988bf-86f1-41af-91ab-2d7cd011db47 prod: 72f988bf-86f1-41af-91ab-2d7cd011db47 dev: "" -provisionerAppId: - pg: e3fdf864-b936-4279-b787-55adc0f9984a - prod: 5d3c637e-07b7-4b29-83d9-87eb050dfdfb - dev: "" diff --git a/tools/stress-cluster/cluster/provision.ps1 b/tools/stress-cluster/cluster/provision.ps1 index 11b1561be9c..d4a13e55598 100644 --- a/tools/stress-cluster/cluster/provision.ps1 +++ b/tools/stress-cluster/cluster/provision.ps1 @@ -82,78 +82,6 @@ function RunOrExitOnFailure() } } -function DeployStaticResources([hashtable]$params) -{ - Write-Host "Deploying static resources" - - $formattedTags = $params.tags.GetEnumerator() | ForEach-Object { "'$($_.Name)=$($_.Value)'" } - $formattedTags = $formattedTags -join ' ' - - RunOrExitOnFailure az group create ` - -n $params.staticTestKeyvaultGroup ` - -l $params.clusterLocation ` - --subscription $params.subscriptionId ` - --tags $formattedTags - - $kv = Run az keyvault show ` - -n $params.staticTestKeyvaultName ` - -g $params.staticTestKeyvaultGroup ` - --subscription $params.subscriptionId - if (!$kv) { - RunOrExitOnFailure az keyvault create ` - -n $params.staticTestKeyvaultName ` - -g $params.staticTestKeyvaultGroup ` - --subscription $params.subscriptionId - } - - $values = GetEnvValues - if ($values.provisionerAppId.$Environment) { - $preExistingProvisionerApp = Run az ad sp show -o json --id $values.provisionerAppId.$Environment - if ($preExistingProvisionerApp) { - Write-Host "Found pre-existing provisioner application '$($values.provisionerAppId.$Environment)'" - return - } else { - Write-Host "Failed to find provisioner application '$($values.provisionerAppId.$Environment)'" - } - } - - $spName = "stress-provisioner-$($params.groupSuffix)" - Write-Host "Creating new provisioner application '$spName'." - - $sp = RunOrExitOnFailure az ad sp create-for-rbac ` - -o json ` - -n $spName ` - --role Owner ` - --scopes "/subscriptions/$($params.subscriptionId)" - $spInfo = $sp | ConvertFrom-Json - # Force check to see if the service principal was succesfully created and propagated - $oid = (RunOrExitOnFailure az ad sp show -o json --id $spInfo.appId | ConvertFrom-Json).id - - $credentials = @{ - AZURE_CLIENT_ID = $spInfo.appId - AZURE_CLIENT_SECRET = $spInfo.password - AZURE_CLIENT_OID = $oid - AZURE_TENANT_ID = $spInfo.tenant - AZURE_SUBSCRIPTION_ID = $params.subscriptionId - STRESS_CLUSTER_RESOURCE_GROUP = $STRESS_CLUSTER_RESOURCE_GROUP - } - - # Powershell on windows does not play nicely passing strings with newlines as secret values - # to the Azure CLI keyvault command, so use a file here instead. - $envFile = Join-Path ([System.IO.Path]::GetTempPath()) "/static.env" - $dotenv = $credentials.GetEnumerator() | ForEach-Object { "$($_.Key)=$($_.Value)`n" } - (-join $dotenv) | Out-File $envFile - Run az keyvault secret set --vault-name $params.staticTestKeyvaultName --file $envFile -n $STATIC_TEST_DOTENV_NAME - if (Test-Path $envFile) { - Remove-Item -Force $envFile - } - if ($LASTEXITCODE) { - exit $LASTEXITCODE - } - - SetEnvProvisioner $spInfo -} - function GetEnvValues() { $values = ConvertFrom-Yaml -Ordered (Get-Content -Raw $VALUES_FILE) @@ -167,13 +95,6 @@ function SetEnvValues([object]$values) Write-Warning "$VALUES_FILE has been updated and must be checked in." } -function SetEnvProvisioner([object]$provisioner) -{ - $values = GetEnvValues - $values.provisionerAppId.$Environment = $provisioner.appId - SetEnvValues $values -} - function SetEnvOutputs([hashtable]$params) { $outputs = (az deployment sub show ` @@ -190,12 +111,20 @@ function SetEnvOutputs([hashtable]$params) $values.debugStorageKeySecretName.$Environment = $outputs.DEBUG_STORAGE_KEY_SECRET_NAME.value $values.debugStorageAccountSecretName.$Environment = $outputs.DEBUG_STORAGE_ACCOUNT_SECRET_NAME.value $values.debugFileShareName.$Environment = $outputs.DEBUG_FILESHARE_NAME.value - $values.staticTestSecretsKeyvaultName.$Environment = $outputs.STATIC_TEST_SECRETS_KEYVAULT.value $values.clusterTestSecretsKeyvaultName.$Environment = $outputs.CLUSTER_TEST_SECRETS_KEYVAULT.value $values.secretProviderIdentity.$Environment = $outputs.SECRET_PROVIDER_CLIENT_ID.value - $values.subscription.$Environment = $STATIC_TEST_DOTENV_NAME + $values.infraWorkloadAppServiceAccountName.$Environment = $outputs.INFRA_WORKLOAD_APP_SERVICE_ACCOUNT_NAME.value + $values.infraWorkloadAppClientId.$Environment = $outputs.INFRA_WORKLOAD_APP_CLIENT_ID.value + $values.infraWorkloadAppObjectId.$Environment = $outputs.INFRA_WORKLOAD_APP_OBJECT_ID.value + $values.workloadAppIssuer.$Environment = $outputs.WORKLOAD_APP_ISSUER.value + $values.clusterGroup.$Environment = $outputs.RESOURCE_GROUP.value + $values.subscriptionId.$Environment = $outputs.SUBSCRIPTION_ID.value $values.tenantId.$Environment = $outputs.TENANT_ID.value + # The workload apps can be found in the stress resource group as Managed Identity types + $clientNames = ($outputs.WORKLOAD_APPS.value | ConvertFrom-Json -AsHashtable).name -join ',' + $values.workloadAppClientNamePool.$Environment = $clientNames + SetEnvValues $values } @@ -212,7 +141,9 @@ function DeployClusterResources([hashtable]$params) --parameters groupName=$STRESS_CLUSTER_RESOURCE_GROUP ` --parameters updateNodes=$UpdateNodes - SetEnvOutputs $params + if (!$WhatIfPreference) { + SetEnvOutputs $params + } Write-Host "Importing cluster credentials" RunSupportingWhatIfFlag "--only-show-errors" az aks get-credentials ` @@ -245,7 +176,8 @@ function DeployHelmResources() Run kubectl create namespace $Namespace --dry-run=client -o yaml | kubectl apply -f - # Skip installing chaos mesh charts in development mode (i.e. when testing stress watcher only). - $deployChaosMesh = "$(!$Development)".ToLower() + #$deployChaosMesh = "$(!$Development)".ToLower() + $deployChaosMesh = "false" RunSupportingWhatIfFlag "--dry-run" helm upgrade --install stress-infra ` -n $Namespace ` @@ -315,7 +247,6 @@ function main() if (!$Development) { $params = LoadEnvParams $STRESS_CLUSTER_RESOURCE_GROUP = "rg-stress-cluster-$($params.groupSuffix)" - DeployStaticResources $params DeployClusterResources $params RegisterAKSFeatures $STRESS_CLUSTER_RESOURCE_GROUP $params.clusterName } diff --git a/tools/stress-cluster/services/Stress.Watcher/Dockerfile b/tools/stress-cluster/services/Stress.Watcher/Dockerfile index 4e9ffb8f18d..7a1ccabbe8b 100644 --- a/tools/stress-cluster/services/Stress.Watcher/Dockerfile +++ b/tools/stress-cluster/services/Stress.Watcher/Dockerfile @@ -2,11 +2,11 @@ FROM mcr.microsoft.com/dotnet/sdk:6.0-cbl-mariner2.0 AS build COPY ./src /src -RUN cd /src && dotnet publish -c Release -o /stresswatcher -f net6.0 +RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net6.0 -p:PublishSingleFile=true --self-contained -FROM mcr.microsoft.com/dotnet/runtime:6.0-cbl-mariner2.0 +FROM mcr.microsoft.com/azure-cli:cbl-mariner2.0 COPY --from=build /stresswatcher /stresswatcher WORKDIR /stresswatcher -ENTRYPOINT ["dotnet", "Stress.Watcher.dll"] +ENTRYPOINT ["./Stress.Watcher.dll"] diff --git a/tools/stress-cluster/services/Stress.Watcher/src/JobEventHandler.cs b/tools/stress-cluster/services/Stress.Watcher/src/JobEventHandler.cs index b91dbb7a2ec..e29d7887473 100644 --- a/tools/stress-cluster/services/Stress.Watcher/src/JobEventHandler.cs +++ b/tools/stress-cluster/services/Stress.Watcher/src/JobEventHandler.cs @@ -18,23 +18,27 @@ public class JobEventHandler private Kubernetes Client; private GenericChaosClient ChaosClient; - private ArmClient ARMClient; + private ArmClient ArmClient; private Serilog.Core.Logger Logger; + private SubscriptionResource Subscription; + public string Namespace; public JobEventHandler( Kubernetes client, GenericChaosClient chaosClient, ArmClient armClient, + SubscriptionResource subscription, string watchNamespace = "" ) { Client = client; ChaosClient = chaosClient; - ARMClient = armClient; + ArmClient = armClient; Namespace = watchNamespace; + Subscription = subscription; Logger = new LoggerConfiguration() .MinimumLevel.Information() @@ -126,18 +130,16 @@ public async Task DeleteResources(V1Job job, WatchEventType eventType) return; } - Subscription subscription = ARMClient.DefaultSubscription; - - ResourceGroup resourceGroup; + ResourceGroupResource resourceGroup; try { - resourceGroup = await subscription.GetResourceGroups().GetAsync(rgName); + resourceGroup = await Subscription.GetResourceGroups().GetAsync(rgName); } catch (Exception) { - Logger.Error($"Failed to get resource group '{rgName}' using subsription id '{subscription.Id}'"); + Logger.Error($"Failed to get resource group '{rgName}' using subsription id '{Subscription.Id}'"); return; } Logger.Information($"Deleting resources for group {rgName}"); - await resourceGroup.DeleteAsync(); + await resourceGroup.DeleteAsync(Azure.WaitUntil.Completed); Logger.Information($"Deleted resources for group {rgName}"); } diff --git a/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs b/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs new file mode 100644 index 00000000000..f415121f1d8 --- /dev/null +++ b/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs @@ -0,0 +1,295 @@ +using System; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Stress.Watcher.Extensions; +using k8s; +using k8s.Models; +using Serilog; +using Serilog.Context; +using Serilog.Sinks.SystemConsole.Themes; +using System.Collections.Generic; +using Azure.ResourceManager; +using Azure.ResourceManager.Resources; +using Azure.ResourceManager.ManagedServiceIdentities; + +namespace Stress.Watcher +{ + public class NamespaceEventHandler + { + private Kubernetes Client; + private ArmClient ArmClient; + private string SubscriptionId; + private string ClusterGroup; + private Serilog.Core.Logger Logger; + private List ExcludedNamespaces = new List { "kube-system", "kube-public", "kube-node-lease", "gatekeeper-system", "stress-infra", "default" }; + private string WatchNamespace = ""; + + // Concurrent Federated Identity Credentials writes under the same managed identity are not supported + private static readonly SemaphoreSlim FederatedCredentialWriteSemaphore = new(1, 1); + + public List WorkloadAppPool; + public string WorkloadAppIssuer; + + public NamespaceEventHandler( + Kubernetes client, + ArmClient armClient, + string subscriptionId, + string clusterGroup, + // GraphServiceClient graphClient, + List workloadAppPool, + string workloadAppIssuer, + string watchNamespace = "" + ) + { + Client = client; + ArmClient = armClient; + SubscriptionId = subscriptionId; + ClusterGroup = clusterGroup; + // GraphClient = graphClient; + WorkloadAppPool = workloadAppPool; + WorkloadAppIssuer = workloadAppIssuer; + WatchNamespace = watchNamespace; + + Logger = new LoggerConfiguration() + .MinimumLevel.Information() + .Enrich + .FromLogContext() + .WriteTo.Console( + outputTemplate: "[{Timestamp:hh:mm:ss} {Level:u3}] {Message,-30:lj} {Properties:j}{NewLine}{Exception}", + theme: AnsiConsoleTheme.Code + ) + .CreateLogger(); + } + + public async Task Watch(CancellationToken cancellationToken) + { + string resourceVersion = null; + while (!cancellationToken.IsCancellationRequested) + { + try + { + Logger.Information("Starting namespace watch"); + var listTask = Client.CoreV1.ListNamespaceWithHttpMessagesAsync( + allowWatchBookmarks: true, + watch: true, + resourceVersion: resourceVersion, + cancellationToken: cancellationToken + ); + var tcs = new TaskCompletionSource(); + using var watcher = listTask.Watch( + (type, ns) => + { + resourceVersion = ns.ResourceVersion(); + HandleNamespaceEvent(type, ns); + }, + (err) => + { + Logger.Error(err, "Handling error event for namespace watch stream."); + if (err is KubernetesException kubernetesError) + { + // Handle "too old resource version" + if (string.Equals(kubernetesError.Status.Reason, "Expired", StringComparison.Ordinal)) + { + resourceVersion = null; + } + } + tcs.TrySetException(err); + throw err; + }, + () => + { + Logger.Warning("Namespace watch has closed."); + tcs.TrySetResult(); + } + ); + using var registration = cancellationToken.Register(watcher.Dispose); + await tcs.Task; + } + catch (Exception ex) + { + Log.Error(ex, "Error with Namespace watch stream."); + await Task.Delay(1000, cancellationToken); + } + } + } + + public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns) + { + if (ExcludedNamespaces.Contains(ns.Name())) + { + return; + } + if (!string.IsNullOrEmpty(WatchNamespace) && ns.Name() != WatchNamespace) + { + Logger.Information($"Skipping namespace '{ns.Name()}' because it is not the watched namespace '{WatchNamespace}'"); + return; + } + + using (LogContext.PushProperty("namespace", ns.Name())) + { + if (eventType == WatchEventType.Added) + { + InitializeWorkloadIdForNamespace(ns).ContinueWith(t => + { + if (t.Exception != null) + { + Logger.Error(t.Exception, "Error creating federated identity credential."); + return; + } + }); + } + else if (eventType == WatchEventType.Deleted) + { + DeleteFederatedIdentityCredential(ns).ContinueWith(t => + { + Logger.Information("Releasing federated credential write semaphore"); + FederatedCredentialWriteSemaphore.Release(); + if (t.Exception != null) + { + Logger.Error(t.Exception, "Error deleting federated identity credential."); + } + }); + } + } + } + + public string CreateFederatedIdentityCredentialName(V1Namespace ns) + { + return $"stress-{ns.Name()}"; + } + + public async Task InitializeWorkloadIdForNamespace(V1Namespace ns) + { + UserAssignedIdentityResource selectedWorkloadIdentity = null; + try + { + selectedWorkloadIdentity = await CreateFederatedIdentityCredential(ns); + } + finally + { + Logger.Information("Releasing federated credential write semaphore"); + FederatedCredentialWriteSemaphore.Release(); + } + + var identityData = await selectedWorkloadIdentity.GetAsync(); + var selectedWorkloadAppId = identityData.Value.Data.ClientId.ToString(); + + var meta = new V1ObjectMeta(){ + Name = ns.Name(), + NamespaceProperty = ns.Name(), + Annotations = new Dictionary(){ + { "azure.workload.identity/client-id", selectedWorkloadAppId } + } + }; + var serviceAccount = new V1ServiceAccount(metadata: meta); + await Client.CreateNamespacedServiceAccountAsync(serviceAccount, ns.Name()); + Logger.Information($"Created service account '{ns.Name()}/{ns.Name()}' with workload client id '{selectedWorkloadAppId}'"); + } + + public async Task CreateFederatedIdentityCredential(V1Namespace ns) + { + var credentialName = CreateFederatedIdentityCredentialName(ns); + var subject = $"system:serviceaccount:{ns.Name()}:{ns.Name()}"; + string selectedWorkloadApp = ""; + UserAssignedIdentityResource selectedIdentity = null; + + // Wait on the list call so we don't have an outdated collection state for multiple namespaces events processed together + // This is a slow sequence of calls to lock on (several seconds) but frequency is only high enough for this to matter + // on service startup with a large number of namespaces that haven't been initialized. + Logger.Information($"Waiting for federated credential write semaphore"); + await FederatedCredentialWriteSemaphore.WaitAsync(); + + foreach (var workloadApp in WorkloadAppPool) + { + var userAssignedIdentityResourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, workloadApp); + var userAssignedIdentity = ArmClient.GetUserAssignedIdentityResource(userAssignedIdentityResourceId); + Logger.Information($"Getting federated identity credentials for managed identity '{workloadApp}'"); + var fedCreds = userAssignedIdentity.GetFederatedIdentityCredentials(); + + // Federated credentials maxes out per managed identity at 20, leave some wiggle room due to list state delays + Logger.Information($"Found {fedCreds.Count()} creds for {workloadApp}"); + if (fedCreds.Count() < 19) + { + selectedWorkloadApp = workloadApp; + selectedIdentity = userAssignedIdentity; + break; + } + } + + if (string.IsNullOrEmpty(selectedWorkloadApp) || selectedIdentity == null) + { + var errorMessage = "No available managed identities to create federated identity credential. Add more to the pool."; + Logger.Error(errorMessage); + throw new Exception(errorMessage); + } + + var resourceGroupResourceId = ResourceGroupResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup); + var resourceGroupResource = ArmClient.GetResourceGroupResource(resourceGroupResourceId); + var collection = resourceGroupResource.GetUserAssignedIdentities(); + + var federatedIdentityCredentialResourceId = FederatedIdentityCredentialResource.CreateResourceIdentifier( + SubscriptionId, ClusterGroup, selectedWorkloadApp, credentialName); + var federatedIdentityCredential = ArmClient.GetFederatedIdentityCredentialResource(federatedIdentityCredentialResourceId); + + var fedCredData = new FederatedIdentityCredentialData() + { + IssuerUri = new Uri(WorkloadAppIssuer), + // Azure AKS workload identity enabled service accounts follow this scheme for subject: + // system:serviceaccount:{namespace}:{service account name} + Subject = subject, + Audiences = { + "api://AzureADTokenExchange", + }, + }; + + Logger.Information($"Creating/updating federated identity credential '{credentialName}' " + + $"with subject '{subject}' for managed identity '{selectedWorkloadApp}'"); + var lro = await federatedIdentityCredential.UpdateAsync(Azure.WaitUntil.Completed, fedCredData); + Logger.Information($"Created federated identity credential '{lro.Value.Data.Name}'"); + + return selectedIdentity; + } + + public async Task DeleteFederatedIdentityCredential(V1Namespace ns) + { + var credentialName = CreateFederatedIdentityCredentialName(ns); + var workloadApp = ""; + foreach (var app in WorkloadAppPool) + { + var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app); + var userAssignedIdentity = ArmClient.GetUserAssignedIdentityResource(resourceId); + var fedCreds = userAssignedIdentity.GetFederatedIdentityCredentials(); + await foreach (var item in fedCreds.GetAllAsync()) + { + if (item.Data.Name == credentialName) + { + workloadApp = app; + break; + } + } + if (!String.IsNullOrEmpty(workloadApp)) + { + break; + } + } + + if (string.IsNullOrEmpty(workloadApp)) + { + Logger.Warning($"Federated identity credential '{credentialName}' not found in workload app pool. Skipping delete."); + return; + } + + var federatedIdentityCredentialResourceId = FederatedIdentityCredentialResource.CreateResourceIdentifier( + SubscriptionId, ClusterGroup, workloadApp, credentialName); + var federatedIdentityCredential = ArmClient.GetFederatedIdentityCredentialResource(federatedIdentityCredentialResourceId); + + Logger.Information($"Waiting for federated credential write semaphore"); + await FederatedCredentialWriteSemaphore.WaitAsync(); + + Logger.Information($"Deleting federated identity credential '{credentialName}' for managed identity '{workloadApp}'"); + var lro = await federatedIdentityCredential.DeleteAsync(Azure.WaitUntil.Completed); + Logger.Information($"Deleted federated identity credential '{credentialName}'"); + } + } +} diff --git a/tools/stress-cluster/services/Stress.Watcher/src/Program.cs b/tools/stress-cluster/services/Stress.Watcher/src/Program.cs index 4547f1f9129..4c8550a9f52 100644 --- a/tools/stress-cluster/services/Stress.Watcher/src/Program.cs +++ b/tools/stress-cluster/services/Stress.Watcher/src/Program.cs @@ -1,13 +1,14 @@ using System; using System.Collections.Generic; +using System.IO; using System.Threading; using System.Threading.Tasks; using k8s; -using k8s.Models; using CommandLine; using Azure.Identity; using Azure.ResourceManager; using dotenv.net; +using YamlDotNet.RepresentationModel; namespace Stress.Watcher { @@ -17,6 +18,26 @@ public class Options { [Option('n', "namespace", Required = false, HelpText = "Watch specified namespace only.")] public string Namespace { get; set; } + + [Option('e', "environment", Required = false, HelpText = "Stress environment, specify for local testing")] + public string Environment { get; set; } + + [Option('l', "local-addons-path", Required = false, HelpText = "Local stress-test-addons chart path, specify for local testing to load stress cluster config")] + public string LocalAddonsPath { get; set; } + + [Option('i', "workload-app-issuer", Required = false, HelpText = "Cluster issuer URL for workload app token requests")] + public string WorkloadAppIssuer { get; set; } + + [Option('w', "workload-app-pool", Required = false, HelpText = "Pool of workload identity apps to use for creating namespaced federated credentials")] + public string WorkloadAppPool { get; set; } + } + + class WorkloadAuthConfig + { + public List WorkloadAppPool; + public string WorkloadAppIssuer; + public string SubscriptionId; + public string ClusterGroup; } static async Task Main(string[] args) @@ -30,40 +51,118 @@ await Parser.Default.ParseArguments(args) static async Task Program(Options options) { - KubernetesClientConfiguration config; + KubernetesClientConfiguration k8sConfig; + var isLocal = false; // Try to load kubeconfig file, if running locally, // otherwise try in cluster config (running in k8s container) try { - config = KubernetesClientConfiguration.BuildConfigFromConfigFile(); + k8sConfig = KubernetesClientConfiguration.BuildConfigFromConfigFile(); + isLocal = true; } catch (Exception) { - config = KubernetesClientConfiguration.InClusterConfig(); + k8sConfig = KubernetesClientConfiguration.InClusterConfig(); } - var client = new Kubernetes(config); - var chaosClient = new GenericChaosClient(config); + var workloadConfig = GetWorkloadConfigValues(options, isLocal); - DotEnv.Load(options: new DotEnvOptions(envFilePaths: new[] {"/mnt/outputs/.env"})); - var subscriptionId = Environment.GetEnvironmentVariable("AZURE_SUBSCRIPTION_ID"); - // Default to 'Azure SDK Developer Playground' subscription when testing locally outside of the stress cluster. - subscriptionId = subscriptionId ?? "faa080af-c1d8-40ad-9cce-e1a450ca5b57"; - - ArmClient armClient = new ArmClient(subscriptionId, new DefaultAzureCredential()); + var credential = new DefaultAzureCredential(); + var client = new Kubernetes(k8sConfig); + var chaosClient = new GenericChaosClient(k8sConfig); + var armClient = new ArmClient(credential, workloadConfig.SubscriptionId); + var subscription = armClient.GetDefaultSubscription(); var podEventHandler = new PodEventHandler(client, chaosClient, armClient, options.Namespace); - var jobEventHandler = new JobEventHandler(client, chaosClient, armClient, options.Namespace); + var jobEventHandler = new JobEventHandler(client, chaosClient, armClient, subscription, options.Namespace); + var namespaceEventHandler = new NamespaceEventHandler( + client, armClient, workloadConfig.SubscriptionId, workloadConfig.ClusterGroup, + workloadConfig.WorkloadAppPool, workloadConfig.WorkloadAppIssuer, options.Namespace); var cts = new CancellationTokenSource(); var taskList = new List { Task.Run(async () => { await podEventHandler.Watch(cts.Token); }), Task.Run(async () => { await jobEventHandler.Watch(cts.Token); }), + Task.Run(async () => { await namespaceEventHandler.Watch(cts.Token); }), }; await Task.WhenAll(taskList.ToArray()); } + + static WorkloadAuthConfig GetWorkloadConfigValues(Options options, Boolean isLocal) + { + if (!isLocal) + { + DotEnv.Load(options: new DotEnvOptions(envFilePaths: new[] { "/mnt/outputs/.env" })); + } + + var workloadAppPool = options.WorkloadAppPool != null ? new List(options.WorkloadAppPool.Split(',')) : null; + var workloadAppIssuer = options.WorkloadAppIssuer; + var subscriptionId = Environment.GetEnvironmentVariable("AZURE_SUBSCRIPTION_ID"); + var clusterGroup = Environment.GetEnvironmentVariable("STRESS_CLUSTER_RESOURCE_GROUP"); + + if (isLocal) + { + if (string.IsNullOrEmpty(options.Environment) || string.IsNullOrEmpty(options.LocalAddonsPath)) + { + Console.WriteLine("The --environment flag and --local-addons-path flags must be set when running locally.\n" + + "Add '-e prod', '-e pg', or your custom environment to set the environment flag.\n" + + "Add '-l ' to set the local addons path flag.\n" + + "Local addons path can be found in /tools/stress-cluster/cluster/kubernetes/stress-test-addons/.\n"); + Environment.Exit(1); + } + + using var reader = new StreamReader(Path.Combine(options.LocalAddonsPath, "values.yaml")); + var yaml = new YamlStream(); + yaml.Load(reader); + var mapping = (YamlMappingNode)yaml.Documents[0].RootNode; + + if (String.IsNullOrEmpty(options.WorkloadAppIssuer)) + { + var workloadAppIssuerKey = (YamlMappingNode)mapping.Children[new YamlScalarNode("workloadAppIssuer")]; + workloadAppIssuer = ((YamlScalarNode)workloadAppIssuerKey.Children[new YamlScalarNode(options.Environment)]).Value; + } + + if (options.WorkloadAppPool == null) + { + var workloadPoolKey = (YamlMappingNode)mapping.Children[new YamlScalarNode("workloadAppClientNamePool")]; + var workloadPoolAppsCsv = ((YamlScalarNode)workloadPoolKey.Children[new YamlScalarNode(options.Environment)]).Value; + workloadAppPool = new List(workloadPoolAppsCsv.Split(',')); + } + + var subscriptionKey = (YamlMappingNode)mapping.Children[new YamlScalarNode("subscriptionId")]; + subscriptionId = ((YamlScalarNode)subscriptionKey.Children[new YamlScalarNode(options.Environment)]).Value; + + var clusterGroupKey = (YamlMappingNode)mapping.Children[new YamlScalarNode("clusterGroup")]; + clusterGroup = ((YamlScalarNode)clusterGroupKey.Children[new YamlScalarNode(options.Environment)]).Value; + } + + if (String.IsNullOrEmpty(workloadAppIssuer)) + { + throw new Exception("Workload app issuer must be specified via --workload-app-issuer flag, or in addons values.yaml file if running locally"); + } + if (workloadAppPool == null || workloadAppPool.Count == 0) + { + throw new Exception("Workload app pool must be specified via --workload-app-pool flag, or in addons values.yaml file if running locally"); + } + if (String.IsNullOrEmpty(subscriptionId)) + { + throw new Exception("Subscription must be specified via AZURE_SUBSCRIPTION_ID environment variable, or via --environment to load addons values.yaml file if running locally"); + } + if (String.IsNullOrEmpty(clusterGroup)) + { + throw new Exception("Cluster resource group must be specified via STRESS_CLUSTER_RESOURCE_GROUP environment variable, or via --environment to load addons values.yaml file if running locally"); + } + + return new WorkloadAuthConfig + { + WorkloadAppPool = workloadAppPool, + WorkloadAppIssuer = workloadAppIssuer, + SubscriptionId = subscriptionId, + ClusterGroup = clusterGroup + }; + } } } diff --git a/tools/stress-cluster/services/Stress.Watcher/src/Stress.Watcher.csproj b/tools/stress-cluster/services/Stress.Watcher/src/Stress.Watcher.csproj index 14dcdd1ed3d..61f6af9e6ae 100644 --- a/tools/stress-cluster/services/Stress.Watcher/src/Stress.Watcher.csproj +++ b/tools/stress-cluster/services/Stress.Watcher/src/Stress.Watcher.csproj @@ -6,6 +6,10 @@ Stress.Watcher + + true + + @@ -13,8 +17,10 @@ - + + + diff --git a/tools/stress-cluster/services/Stress.Watcher/tests/JobEventHandlerTest.cs b/tools/stress-cluster/services/Stress.Watcher/tests/JobEventHandlerTest.cs index 0a63ab703f7..e7463bb52be 100644 --- a/tools/stress-cluster/services/Stress.Watcher/tests/JobEventHandlerTest.cs +++ b/tools/stress-cluster/services/Stress.Watcher/tests/JobEventHandlerTest.cs @@ -52,7 +52,7 @@ private V1Container CreateContainer(string name, List env = null) [Fact] public void TestShouldDeleteResourcesWithInitContainer() { - var handler = new JobEventHandler(null, null, null); + var handler = new JobEventHandler(null, null, null, null); var job = CreateJob("testns"); handler.ShouldDeleteResources(job, WatchEventType.Modified).Should().BeFalse(); @@ -118,7 +118,7 @@ public void TestShouldDeleteResourcesWithInitContainer() [Fact] public void TestShouldDeleteResourcesWithCondition() { - var handler = new JobEventHandler(null, null, null); + var handler = new JobEventHandler(null, null, null, null); var job = CreateJob("testns"); handler.ShouldDeleteResources(job, WatchEventType.Modified).Should().BeFalse(); @@ -143,7 +143,7 @@ public void TestShouldDeleteResourcesWithCondition() [Fact] public void TestShouldDeleteResourcesWithLabel() { - var handler = new JobEventHandler(null, null, null); + var handler = new JobEventHandler(null, null, null, null); var job = CreateJob("testns"); handler.ShouldDeleteResources(job, WatchEventType.Modified).Should().BeFalse(); @@ -160,7 +160,7 @@ public void TestShouldDeleteResourcesWithLabel() [Fact] public void TestGetResourceGroupName() { - var handler = new JobEventHandler(null, null, null); + var handler = new JobEventHandler(null, null, null, null); var job = CreateJob("testns"); job.Spec.Template.Spec.InitContainers = new List()