Skip to content

Commit

Permalink
[stress testing] Update prod agent pool. Swap login/pushimages parame…
Browse files Browse the repository at this point in the history
…ter defaults (#6103)

* Update prod agent pool size

* Swap defaults: -Login and -PushImages to -SkipLogin and -SkipPushImages
  • Loading branch information
benbp authored May 11, 2023
1 parent b0afedc commit fc4a78a
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 37 deletions.
8 changes: 3 additions & 5 deletions eng/common/scripts/stress-testing/deploy-stress-tests.ps1
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
# Set a default parameter set here so we can call this script without requiring -Login and -Subscription,
# but if it IS called with either of those, then both parameters need to be required. Not defining a
# default parameter set makes Login/Subscription required all the time.
# Not defining a default parameter set makes SkipLogin/Subscription required all the time.
[CmdletBinding(DefaultParameterSetName = 'Default')]
param(
[string]$SearchDirectory,
[hashtable]$Filters,
[string]$Environment,
[string]$Repository,
[switch]$PushImages,
[switch]$SkipPushImages,
[string]$ClusterGroup,
[string]$DeployId,
[switch]$Login,
[switch]$SkipLogin,
[string]$Subscription,

# Default to true in Azure Pipelines environments
Expand Down
26 changes: 13 additions & 13 deletions eng/common/scripts/stress-testing/stress-test-deployment-lib.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function RunOrExitOnFailure()
}
}

function Login([string]$subscription, [string]$clusterGroup, [switch]$pushImages)
function Login([string]$subscription, [string]$clusterGroup, [switch]$skipPushImages)
{
Write-Host "Logging in to subscription, cluster and container registry"
az account show *> $null
Expand Down Expand Up @@ -73,7 +73,7 @@ function Login([string]$subscription, [string]$clusterGroup, [switch]$pushImages
RunOrExitOnFailure kubectl config set-context $clusterName --namespace $defaultNamespace
}

if ($pushImages) {
if (!$skipPushImages) {
$registry = RunOrExitOnFailure az acr list -g $clusterGroup --subscription $subscription -o json
$registryName = ($registry | ConvertFrom-Json).name
RunOrExitOnFailure az acr login -n $registryName
Expand All @@ -86,10 +86,10 @@ function DeployStressTests(
# Default to playground environment
[string]$environment = 'pg',
[string]$repository = '',
[switch]$pushImages,
[switch]$skipPushImages,
[string]$clusterGroup = '',
[string]$deployId = '',
[switch]$login,
[switch]$skipLogin,
[string]$subscription = '',
[switch]$CI,
[string]$Namespace,
Expand Down Expand Up @@ -125,8 +125,8 @@ function DeployStressTests(
throw "clusterGroup and subscription parameters must be specified when deploying to an environment that is not pg or prod."
}

if ($login) {
Login -subscription $subscription -clusterGroup $clusterGroup -pushImages:$pushImages
if (!$skipLogin) {
Login -subscription $subscription -clusterGroup $clusterGroup -skipPushImages:$skipPushImages
}

$chartRepoName = 'stress-test-charts'
Expand Down Expand Up @@ -162,8 +162,8 @@ function DeployStressTests(
-deployId $deployer `
-environment $environment `
-repositoryBase $repository `
-pushImages:$pushImages `
-login:$login `
-skipPushImages:$skipPushImages `
-skipLogin:$skipLogin `
-clusterGroup $clusterGroup `
-subscription $subscription
}
Expand All @@ -189,8 +189,8 @@ function DeployStressPackage(
[string]$deployId,
[string]$environment,
[string]$repositoryBase,
[switch]$pushImages,
[switch]$login,
[switch]$skipPushImages,
[switch]$skipLogin,
[string]$clusterGroup,
[string]$subscription
) {
Expand Down Expand Up @@ -267,7 +267,7 @@ function DeployStressPackage(
}
$dockerfileName = ($dockerFilePath -split { $_ -in '\', '/' })[-1].ToLower()
$imageTag = $imageTagBase + "/${dockerfileName}:${deployId}"
if ($pushImages) {
if (!$skipPushImages) {
Write-Host "Building and pushing stress test docker image '$imageTag'"
$dockerFile = Get-ChildItem $dockerFilePath

Expand All @@ -290,8 +290,8 @@ function DeployStressPackage(

Run docker push $imageTag
if ($LASTEXITCODE) {
if ($login) {
Write-Warning "If docker push is failing due to authentication issues, try calling this script with '-Login'"
if (!$skipLogin) {
Write-Warning "If docker push is failing due to authentication issues, try calling this script without '-SkipLogin'"
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions eng/pipelines/templates/jobs/stress-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,5 @@ jobs:
-Filters $(Filters)
-Environment '${{ parameters.Environment }}'
-Repository '$(Agent.JobName)'
-PushImages
-Login
-DeployId '$(Build.BuildNumber)'
-CI
11 changes: 3 additions & 8 deletions tools/stress-cluster/chaos/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,12 @@ package dependencies, and building and pushing docker images. The script must be

If using bash or another linux terminal, a [powershell core](https://docs.microsoft.com/powershell/scripting/install/installing-powershell-core-on-linux?view=powershell-7.1) shell can be invoked via `pwsh`.

The first invocation of the script must be run with the `-Login` flag to set up cluster and container registry access.

```
cd <stress test search directory>
<repo root>/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 `
-Login `
-PushImages
<repo root>/eng/common/scripts/stress-testing/deploy-stress-tests.ps1
```

To re-deploy more quickly, the script can be run without `-Login` and/or without `-PushImages` (if no code changes were
To re-deploy more quickly, the script can be run with `-SkipLogin` and/or with `-SkipPushImages` (if no code changes were
made).

```
Expand Down Expand Up @@ -622,7 +617,7 @@ Follow the below commands to execute a sample test.

```
cd ./examples/network_stress_example
pwsh ../../../../../eng/common/scripts/stress-testing/deploy-stress-tests.ps1 -Login -PushImages
pwsh ../../../../../eng/common/scripts/stress-testing/deploy-stress-tests.ps1
```

Verify the pods in the job have booted and are running ok (with chaos network failures):
Expand Down
8 changes: 3 additions & 5 deletions tools/stress-cluster/cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,7 @@ resource values from the newly provisioned dev environment that are required by
Avoid checking in the updated dev values, they are for local use only.

```
# -Login only needs to be run once or if the azure container registry credentials have expired (~24 hours)
<tools repo>/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 -Login -Environment dev
<tools repo>/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 -Environment dev
```

## Playground Cluster
Expand Down Expand Up @@ -122,7 +121,7 @@ Steps for deploying the stress test addons helm chart:
1. Increment the version number in stress test addons' [Chart.yaml](https://github.com/Azure/azure-sdk-tools/blob/main/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml) (e.g. 0.1.0 -> 0.1.1).
1. Run [deploy.ps1](https://github.com/Azure/azure-sdk-tools/blob/main/tools/stress-cluster/cluster/kubernetes/stress-test-addons/deploy.ps1).
1. Update all the helm chart versions for stress-test-addons dependency references in `azure-sdk-tools/tools/stress-cluster/chaos/examples/**/Chart.yaml`.
1. Run azure-sdk-tools\eng\common\scripts\stress-testing\deploy-stress-tests.ps1 script in the [examples](https://github.com/Azure/azure-sdk-tools/tree/main/tools/stress-cluster/chaos/examples) directory, this will update all the nested helm charts (-login tag is needed for the first run).
1. Run azure-sdk-tools\eng\common\scripts\stress-testing\deploy-stress-tests.ps1 script in the [examples](https://github.com/Azure/azure-sdk-tools/tree/main/tools/stress-cluster/chaos/examples) directory, this will update all the nested helm charts (the -SkipLogin parameter can be used to speed up the script or if interactive login isn't supported by the shell).
1. Run `kubectl get pods -n examples -w` to monitor the status of each pod and look for Running/Completed and make sure there are no errors.
1. Update all the stress tests' Chart.yaml files across the other repos in the same manner.

Expand Down Expand Up @@ -166,8 +165,7 @@ may still be an invalid kubernetes manifest, so the example stress test should a
the full set of changes:

```
# -Login only needs to be run once or if the azure container registry credentials have expired (~24 hours)
<tools repo>/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 -Login
<tools repo>/eng/common/scripts/stress-testing/deploy-stress-tests.ps1
```

For more helm debugging info, see [here](https://helm.sh/docs/chart_template_guide/debugging/).
8 changes: 5 additions & 3 deletions tools/stress-cluster/cluster/azure/cluster/cluster.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ param groupSuffix string
param dnsPrefix string = 's1'
param clusterName string
param location string = resourceGroup().location
param defaultAgentPoolMinNodes int = 6
param defaultAgentPoolMaxNodes int = 20
// AKS does not allow agentPool updates via existing managed cluster resources
param updateNodes bool = false

Expand Down Expand Up @@ -31,9 +33,9 @@ var systemAgentPool = {

var defaultAgentPool = {
name: 'default'
count: 6
minCount: 6
maxCount: 20
count: defaultAgentPoolMinNodes
minCount: defaultAgentPoolMinNodes
maxCount: defaultAgentPoolMaxNodes
mode: 'User'
vmSize: 'Standard_D8a_v4'
type: 'VirtualMachineScaleSets'
Expand Down
4 changes: 4 additions & 0 deletions tools/stress-cluster/cluster/azure/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ param clusterLocation string = 'westus3'
param staticTestKeyvaultName string
param staticTestKeyvaultGroup string
param monitoringLocation string = 'centralus'
param defaultAgentPoolMinNodes int = 6
param defaultAgentPoolMaxNodes int = 20
param tags object
// AKS does not allow agentPool updates via existing managed cluster resources
param updateNodes bool = false
Expand Down Expand Up @@ -75,6 +77,8 @@ module cluster 'cluster/cluster.bicep' = {
updateNodes: updateNodes
location: clusterLocation
clusterName: clusterName
defaultAgentPoolMinNodes: defaultAgentPoolMinNodes
defaultAgentPoolMaxNodes: defaultAgentPoolMaxNodes
tags: tags
groupSuffix: groupSuffix
workspaceId: logWorkspace.outputs.id
Expand Down
6 changes: 6 additions & 0 deletions tools/stress-cluster/cluster/azure/parameters/pg.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
"staticTestKeyvaultGroup": {
"value": "rg-stress-secrets-pg"
},
"defaultAgentPoolMinNodes": {
"value": 2
},
"defaultAgentPoolMaxNodes": {
"value": 10
},
"tags": {
"value": {
"environment": "pg",
Expand Down
6 changes: 6 additions & 0 deletions tools/stress-cluster/cluster/azure/parameters/prod.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
"staticTestKeyvaultGroup": {
"value": "rg-stress-secrets-prod"
},
"defaultAgentPoolMinNodes": {
"value": 2
},
"defaultAgentPoolMaxNodes": {
"value": 10
},
"tags": {
"value": {
"environment": "Prod",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static void Main(string[] args)
Console.WriteLine($"2. Add test code to ./src/");
Console.WriteLine($"3. Update 'Dockerfile' (see contents for help).");
Console.WriteLine($"4. Run the following command from within your language repository to deploy the package:");
Console.WriteLine($" pwsh -c $(git rev-parse --show-toplevel)/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 -Login -PushImages");
Console.WriteLine($" pwsh -c $(git rev-parse --show-toplevel)/eng/common/scripts/stress-testing/deploy-stress-tests.ps1");
Console.WriteLine($"********************************************************************************");
});
}
Expand Down

0 comments on commit fc4a78a

Please sign in to comment.