Skip to content

Commit

Permalink
[stress] stress test rerun failed jobs feature (#5726)
Browse files Browse the repository at this point in the history
closes #5361
  • Loading branch information
ckairen authored Mar 28, 2023
1 parent 6a967e0 commit f6e7ccd
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 11 deletions.
2 changes: 2 additions & 0 deletions eng/common/scripts/stress-testing/deploy-stress-tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ param(
# Renders chart templates locally without deployment
[Parameter(Mandatory=$False)][switch]$Template,

[Parameter(Mandatory=$False)][switch]$RetryFailedTests,

# Matrix generation parameters
[Parameter(Mandatory=$False)][string]$MatrixFileName,
[Parameter(Mandatory=$False)][string]$MatrixSelection,
Expand Down
97 changes: 86 additions & 11 deletions eng/common/scripts/stress-testing/stress-test-deployment-lib.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ function DeployStressTests(
})]
[System.IO.FileInfo]$LocalAddonsPath,
[Parameter(Mandatory=$False)][switch]$Template,
[Parameter(Mandatory=$False)][switch]$RetryFailedTests,
[Parameter(Mandatory=$False)][string]$MatrixFileName,
[Parameter(Mandatory=$False)][string]$MatrixSelection = "sparse",
[Parameter(Mandatory=$False)][string]$MatrixDisplayNameFilter,
Expand Down Expand Up @@ -215,11 +216,16 @@ function DeployStressPackage(
if ($LASTEXITCODE) {exit $LASTEXITCODE}

$dockerBuildConfigs = @()

$genValFile = Join-Path $pkg.Directory "generatedValues.yaml"
$genVal = Get-Content $genValFile -Raw | ConvertFrom-Yaml -Ordered
if (Test-Path $genValFile) {
$scenarios = $genVal.Scenarios

$generatedHelmValuesFilePath = Join-Path $pkg.Directory "generatedValues.yaml"
$generatedHelmValues = Get-Content $generatedHelmValuesFilePath -Raw | ConvertFrom-Yaml -Ordered
$releaseName = $pkg.ReleaseName
if ($RetryFailedTests) {
$releaseName, $generatedHelmValues = generateRetryTestsHelmValues $pkg $releaseName $generatedHelmValues
}

if (Test-Path $generatedHelmValuesFilePath) {
$scenarios = $generatedHelmValues.Scenarios
foreach ($scenario in $scenarios) {
if ("image" -in $scenario.keys) {
$dockerFilePath = Join-Path $pkg.Directory $scenario.image
Expand Down Expand Up @@ -286,7 +292,7 @@ function DeployStressPackage(
}
}
}
$genVal.scenarios = @( foreach ($scenario in $genVal.scenarios) {
$generatedHelmValues.scenarios = @( foreach ($scenario in $generatedHelmValues.scenarios) {
$dockerPath = if ("image" -notin $scenario) {
$dockerFilePath
} else {
Expand All @@ -298,15 +304,15 @@ function DeployStressPackage(
$scenario
} )

$genVal | ConvertTo-Yaml | Out-File -FilePath $genValFile
$generatedHelmValues | ConvertTo-Yaml | Out-File -FilePath $generatedHelmValuesFilePath
}

Write-Host "Installing or upgrading stress test $($pkg.ReleaseName) from $($pkg.Directory)"
Write-Host "Installing or upgrading stress test $releaseName from $($pkg.Directory)"

$generatedConfigPath = Join-Path $pkg.Directory generatedValues.yaml
$subCommand = $Template ? "template" : "upgrade"
$installFlag = $Template ? "" : "--install"
$helmCommandArg = "helm", $subCommand, $pkg.ReleaseName, $pkg.Directory, "-n", $pkg.Namespace, $installFlag, "--set", "stress-test-addons.env=$environment", "--values", $generatedConfigPath
$helmCommandArg = "helm", $subCommand, $releaseName, $pkg.Directory, "-n", $pkg.Namespace, $installFlag, "--set", "stress-test-addons.env=$environment", "--values", $generatedConfigPath

$result = (Run @helmCommandArg) 2>&1 | Write-Host

Expand All @@ -322,7 +328,7 @@ function DeployStressPackage(
# Issues like 'UPGRADE FAILED: another operation (install/upgrade/rollback) is in progress'
# can be the result of cancelled `upgrade` operations (e.g. ctrl-c).
# See https://github.com/helm/helm/issues/4558
Write-Warning "The issue may be fixable by first running 'helm rollback -n $($pkg.Namespace) $($pkg.ReleaseName)'"
Write-Warning "The issue may be fixable by first running 'helm rollback -n $($pkg.Namespace) $releaseName'"
return
}
}
Expand All @@ -333,7 +339,7 @@ function DeployStressPackage(
if(!$Template) {
$helmReleaseConfig = RunOrExitOnFailure kubectl get secrets `
-n $pkg.Namespace `
-l "status=deployed,name=$($pkg.ReleaseName)" `
-l "status=deployed,name=$releaseName" `
-o jsonpath='{.items[0].metadata.name}'
Run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId
}
Expand Down Expand Up @@ -375,3 +381,72 @@ function CheckDependencies()
}

}

function generateRetryTestsHelmValues ($pkg, $releaseName, $generatedHelmValues) {
$podOutput = RunOrExitOnFailure kubectl get pods -n $pkg.namespace -o json
$pods = $podOutput | ConvertFrom-Json

# Get all jobs within this helm release

$helmStatusOutput = RunOrExitOnFailure helm status -n $pkg.Namespace $pkg.ReleaseName --show-resources
# -----Example output-----
# NAME: <Release Name>
# LAST DEPLOYED: Mon Jan 01 12:12:12 2020
# NAMESPACE: <namespace>
# STATUS: deployed
# REVISION: 10
# RESOURCES:
# ==> v1alpha1/Schedule
# NAME AGE
# <schedule resource name 1> 5h5m
# <schedule resource name 2> 5h5m

# ==> v1/SecretProviderClass
# <secret provider name 1> 7d4h

# ==> v1/Job
# NAME COMPLETIONS DURATION AGE
# <job name 1> 0/1 5h5m 5h5m
# <job name 2> 0/1 5h5m 5h5m
$discoveredJob = $False
$jobs = @()
foreach ($line in $helmStatusOutput) {
if ($discoveredJob -and $line -match "==>") {break}
if ($discoveredJob) {
$jobs += ($line -split '\s+')[0] | Where-Object {($_ -ne "NAME") -and ($_)}
}
if ($line -match "==> v1/Job") {
$discoveredJob = $True
}
}

$failedJobsScenario = @()
$revision = 0
foreach ($job in $jobs) {
$jobRevision = [int]$job.split('-')[-1]
if ($jobRevision -gt $revision) {
$revision = $jobRevision
}

$jobOutput = RunOrExitOnFailure kubectl describe jobs -n $pkg.Namespace $job
$podPhase = $jobOutput | Select-String "0 Failed"
if ([System.String]::IsNullOrEmpty($podPhase)) {
$failedJobsScenario += $job.split("-$($pkg.ReleaseName)")[0]
}
}

$releaseName = "$($pkg.ReleaseName)-$revision-retry"

$retryTestsHelmVal = @{"scenarios"=@()}
foreach ($failedScenario in $failedJobsScenario) {
$failedScenarioObject = $generatedHelmValues.scenarios | Where {$_.Scenario -eq $failedScenario}
$retryTestsHelmVal.scenarios += $failedScenarioObject
}

if (!$retryTestsHelmVal.scenarios.length) {
Write-Host "There are no failed pods to retry."
return
}
$generatedHelmValues = $retryTestsHelmVal
return $releaseName, $generatedHelmValues
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ FROM mcr.microsoft.com/cbl-mariner/base/core:2.0
# Included packages: https://github.com/microsoft/CBL-Mariner/blob/1.0/SPECS/core-packages/core-packages.spec

ADD ./poll.sh /poll.sh
RUN tdnf -y install wget
RUN chmod +x /poll.sh

CMD bash /poll.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ FROM mcr.microsoft.com/cbl-mariner/base/core:2.0
# Included packages: https://github.com/microsoft/CBL-Mariner/blob/1.0/SPECS/core-packages/core-packages.spec

ADD ./poll.sh /poll.sh
RUN tdnf -y install wget
RUN chmod +x /poll.sh

CMD bash /poll.sh

0 comments on commit f6e7ccd

Please sign in to comment.