Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fully automate stress cluster buildout and add support for azure file share mounting #2106

Merged
10 commits merged into from
Oct 22, 2021
4 changes: 2 additions & 2 deletions eng/common/scripts/stress-testing/deploy-stress-tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ function DeployStressTests(
[string]$environment = 'test',
[string]$repository = 'images',
[boolean]$pushImages = $false,
[string]$clusterGroup = 'rg-stress-test-cluster-',
[string]$clusterGroup = 'rg-stress-cluster-test',
[string]$deployId = 'local',
[string]$subscription = 'Azure SDK Test Resources'
[string]$subscription = 'Azure SDK Developer Playground'
) {
if ($PSCmdlet.ParameterSetName -eq 'DoLogin') {
Login $subscription $clusterGroup $pushImages
Expand Down
20 changes: 19 additions & 1 deletion eng/containers/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ parameters:
dockerFile: 'tools/test-proxy/docker/dockerfile-win'
stableTags:
- 'latest'
- name: stress_watcher
pool: 'ubuntu-20.04'
dockerRepo: 'stress/watcher'
dockerFile: 'tools/stress-cluster/services/Stress.Watcher/Dockerfile'
stableTags:
- 'latest'

trigger:
branches:
Expand All @@ -32,8 +38,18 @@ trigger:
- eng/containers/
- tools/test-proxy/docker/
- tools/keyvault-mock-attestation/Dockerfile
- tools/stress-cluster/services/Stress.Watcher/Dockerfile

pr: none
pr:
scbedd marked this conversation as resolved.
Show resolved Hide resolved
branches:
include:
- main
paths:
include:
- eng/containers/
- tools/test-proxy/docker/
- tools/keyvault-mock-attestation/Dockerfile
- tools/stress-cluster/services/Stress.Watcher/Dockerfile

variables:
- name: containerRegistry
Expand Down Expand Up @@ -64,6 +80,7 @@ jobs:

- task: Docker@2
displayName: Push ${{ config.name }}:$(imageTag)
condition: ne(variables['Build.Reason'], 'PullRequest')
scbedd marked this conversation as resolved.
Show resolved Hide resolved
benbp marked this conversation as resolved.
Show resolved Hide resolved
inputs:
containerRegistry: $(containerRegistry)
repository: ${{ config.dockerRepo }}
Expand All @@ -81,6 +98,7 @@ jobs:

- task: Docker@2
displayName: Push ${{ config.name }}:${{ stableTag }}
condition: ne(variables['Build.Reason'], 'PullRequest')
benbp marked this conversation as resolved.
Show resolved Hide resolved
inputs:
containerRegistry: $(containerRegistry)
repository: ${{ config.dockerRepo }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v2
name: debug-share-example
description: An example stress test chart that uses a file share for debugging (e.g. for large log files, heap dumps)
version: 0.1.1
appVersion: v0.1
annotations:
stressTest: 'true' # enable auto-discovery of this test via `find-all-stress-packages.ps1`
example: 'true' # enable auto-discovery filtering `find-all-stress-packages.ps1 -filters @{example='true'}`
namespace: 'examples'

dependencies:
- name: stress-test-addons
version: 0.1.9
repository: https://stresstestcharts.blob.core.windows.net/helm/
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- include "stress-test-addons.env-job-template.from-pod" (list . "stress.deploy-example") -}}
{{- define "stress.deploy-example" -}}
metadata:
labels:
testName: "debug-share-example"
spec:
containers:
- name: debug-share-example
image: busybox
command: ['sh', '-c']
args:
- |
cd $DEBUG_SHARE;
pwd;
mkdir example;
echo "debug share example success" > example/success;
ls; ls example; cat example/success;
# The file share is mounted by default at the path $DEBUG_SHARE
# when including the container-env template
{{- include "stress-test-addons.container-env" . | nindent 6 }}
{{- end -}}
2 changes: 2 additions & 0 deletions tools/stress-cluster/cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,11 @@ az ad sp create-for-rbac -n 'stress-test-provisioner' --role Contributor --scope
Create an env file with the service principal values created above:

```
AZURE_CLIENT_OID=<app object id>
AZURE_CLIENT_ID=<app id>
AZURE_CLIENT_SECRET=<password/secret>
AZURE_TENANT_ID=<tenant id>
AZURE_SUBSCRIPTION_ID=<subscription id>
```

Upload it to the static keyvault:
Expand Down
60 changes: 41 additions & 19 deletions tools/stress-cluster/cluster/azure/cluster/cluster.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,52 @@ param groupSuffix string
param dnsPrefix string = 's1'
param clusterName string
param location string = resourceGroup().location
param agentVMSize string = 'Standard_D2_v3'

@minValue(1)
@maxValue(50)
@description('The number of nodes for the cluster.')
param agentCount int = 3
param enableHighMemAgentPool bool = false

// monitoring parameters
param enableMonitoring bool = false
param workspaceId string

var kubernetesVersion = '1.20.5'
var kubernetesVersion = '1.21.2'
var nodeResourceGroup = 'rg-nodes-${dnsPrefix}-${clusterName}-${groupSuffix}'
var agentPoolName = 'agentpool01'

var defaultAgentPool = {
name: 'default'
count: 3
minCount: 3
maxCount: 9
mode: 'System'
vmSize: 'Standard_D2_v3'
type: 'VirtualMachineScaleSets'
osType: 'Linux'
enableAutoScaling: true
enableEncryptionAtHost: true
nodeLabels: {
'sku': 'default'
}
}

var highMemAgentPool = {
name: 'highmemory'
count: 1
minCount: 1
maxCount: 3
mode: 'System'
vmSize: 'Standard_D4ds_v4'
type: 'VirtualMachineScaleSets'
osType: 'Linux'
enableAutoScaling: true
enableEncryptionAtHost: true
nodeLabels: {
'sku': 'highMem'
}
}

var agentPools = concat([
defaultAgentPool
], enableHighMemAgentPool ? [
highMemAgentPool
] : [])

resource cluster 'Microsoft.ContainerService/managedClusters@2020-09-01' = {
name: clusterName
Expand All @@ -41,17 +73,7 @@ resource cluster 'Microsoft.ContainerService/managedClusters@2020-09-01' = {
kubernetesVersion: kubernetesVersion
enableRBAC: true
dnsPrefix: dnsPrefix
agentPoolProfiles: [
{
name: agentPoolName
count: agentCount
mode: 'System'
vmSize: agentVMSize
type: 'VirtualMachineScaleSets'
osType: 'Linux'
enableAutoScaling: false
}
]
agentPoolProfiles: agentPools
servicePrincipalProfile: {
clientId: 'msi'
}
Expand Down
21 changes: 21 additions & 0 deletions tools/stress-cluster/cluster/azure/cluster/storage.bicep
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
param location string = resourceGroup().location
param storageName string
param fileShareName string

resource storage 'Microsoft.Storage/storageAccounts@2019-06-01' = {
name: storageName
location: location
kind: 'StorageV2'
sku: {
name: 'Standard_LRS'
}
}

resource fileshare 'Microsoft.Storage/storageAccounts/fileServices/shares@2021-04-01' = {
name: '${storage.name}/default/${fileShareName}'
properties: { }
}

output name string = storage.name
output key string = storage.listKeys().keys[0].value
output fileShareName string = fileShareName
43 changes: 40 additions & 3 deletions tools/stress-cluster/cluster/azure/main.bicep
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
targetScope = 'subscription'

param subscriptionId string = ''
param groupSuffix string
param clusterName string
param clusterLocation string = 'westus2'
Expand All @@ -8,15 +9,19 @@ param staticTestSecretsKeyvaultGroup string
param monitoringLocation string = 'centralus'
param tags object
param enableMonitoring bool = false
param enableHighMemAgentPool bool = false
param enableDebugStorage bool = false

// Azure Developer Platform Team Group
// https://ms.portal.azure.com/#blade/Microsoft_AAD_IAM/GroupDetailsMenuBlade/Overview/groupId/56709ad9-8962-418a-ad0d-4b25fa962bae
param accessGroups array = [
'56709ad9-8962-418a-ad0d-4b25fa962bae'
]

var groupName = 'rg-stress-cluster-${groupSuffix}'

resource group 'Microsoft.Resources/resourceGroups@2020-10-01' = {
name: 'rg-stress-test-cluster-${groupSuffix}'
name: groupName
location: clusterLocation
tags: tags
}
Expand Down Expand Up @@ -52,6 +57,7 @@ module cluster 'cluster/cluster.bicep' = {
tags: tags
groupSuffix: groupSuffix
enableMonitoring: enableMonitoring
enableHighMemAgentPool: enableHighMemAgentPool
workspaceId: enableMonitoring ? logWorkspace.outputs.id : ''
}
}
Expand All @@ -60,15 +66,34 @@ module containerRegistry 'cluster/acr.bicep' = {
name: 'containerRegistry'
scope: group
params: {
registryName: '${replace(clusterName, '-', '')}registry'
registryName: '${replace(clusterName, '-', '')}${resourceSuffix}'
location: clusterLocation
objectIds: concat(accessGroups, array(cluster.outputs.kubeletIdentityObjectId))
}
}

module storage 'cluster/storage.bicep' = if (enableDebugStorage) {
name: 'storage'
scope: group
params: {
storageName: 'stressdebug${resourceSuffix}'
fileShareName: 'stressfiles${resourceSuffix}'
location: clusterLocation
}
}

var appInsightsInstrumentationKeySecretName = 'appInsightsInstrumentationKey-${resourceSuffix}'
// Value is in dotenv format as it will be appended to stress test container dotenv files
var appInsightsInstrumentationKeySecretValue = 'APPINSIGHTS_INSTRUMENTATIONKEY=${appInsights.outputs.instrumentationKey}\n'

// Storage account information used for kubernetes fileshare volume mounting via the azure files csi driver
// See https://docs.microsoft.com/en-us/azure/aks/azure-files-volume#create-a-kubernetes-secret
// See https://docs.microsoft.com/en-us/azure/aks/azure-files-csi
var debugStorageKeySecretName = 'debugStorageKey-${resourceSuffix}'
var debugStorageKeySecretValue = '${storage.outputs.key}'
var debugStorageAccountSecretName = 'debugStorageAccount-${resourceSuffix}'
var debugStorageAccountSecretValue = '${storage.outputs.name}'

module keyvault 'cluster/keyvault.bicep' = if (enableMonitoring) {
name: 'keyvault'
scope: group
Expand All @@ -83,6 +108,14 @@ module keyvault 'cluster/keyvault.bicep' = if (enableMonitoring) {
secretName: appInsightsInstrumentationKeySecretName
secretValue: appInsightsInstrumentationKeySecretValue
}
{
secretName: debugStorageKeySecretName
secretValue: debugStorageKeySecretValue
}
{
secretName: debugStorageAccountSecretName
secretValue: debugStorageAccountSecretValue
}
]
}
}
Expand All @@ -99,10 +132,14 @@ module accessPolicy 'cluster/static-vault-access-policy.bicep' = {
}

output STATIC_TEST_SECRETS_KEYVAULT string = staticTestSecretsKeyvaultName
output CLUSTER_KEYVAULT string = keyvault.outputs.keyvaultName
output CLUSTER_TEST_SECRETS_KEYVAULT string = keyvault.outputs.keyvaultName
output SECRET_PROVIDER_CLIENT_ID string = cluster.outputs.secretProviderClientId
output CLUSTER_NAME string = cluster.outputs.clusterName
output CONTAINER_REGISTRY_NAME string = containerRegistry.outputs.containerRegistryName
output APPINSIGHTS_KEY_SECRET_NAME string = appInsightsInstrumentationKeySecretName
output DEBUG_STORAGE_KEY_SECRET_NAME string = debugStorageKeySecretName
output DEBUG_STORAGE_ACCOUNT_SECRET_NAME string = debugStorageAccountSecretName
output DEBUG_FILESHARE_NAME string = storage.outputs.fileShareName
output RESOURCE_GROUP string = group.name
output SUBSCRIPTION_ID string = subscriptionId
output TENANT_ID string = subscription().tenantId
12 changes: 9 additions & 3 deletions tools/stress-cluster/cluster/azure/parameters/dev.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"groupName": {
"subscriptionId": {
"value": // add me
},
"groupSuffix": {
"value": // add me
},
"clusterName": {
Expand All @@ -12,10 +15,13 @@
"value": "westus2"
},
"staticTestSecretsKeyvaultName": {
"value": "StressTestSecrets"
"value": "stress-secrets-dev"
},
"staticTestSecretsKeyvaultGroup": {
"value": "rg-StressTestSecrets"
"value": "rg-stress-secrets-dev"
},
"enableDebugStorage": {
"value": true
},
"tags": {
"value": {
Expand Down
7 changes: 5 additions & 2 deletions tools/stress-cluster/cluster/azure/parameters/prod.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"subscriptionId": {
"value": "2cd617ea-1866-46b1-90e3-fffb087ebf9b"
},
"groupSuffix": {
"value": "prod"
},
Expand All @@ -15,10 +18,10 @@
"value": "centralus"
},
"staticTestSecretsKeyvaultName": {
"value": "StressTestSecrets"
"value": "stress-secrets-prod"
},
"staticTestSecretsKeyvaultGroup": {
"value": "rg-StressTestSecrets"
"value": "rg-stress-secrets-prod"
},
"enableMonitoring": {
"value": true
Expand Down
Loading