Skip to content

Commit

Permalink
Add Test Runs using Http Fault Injector (#35030)
Browse files Browse the repository at this point in the history
Add Test Runs using Http Fault Injector
  • Loading branch information
alzimmermsft authored Jun 1, 2023
1 parent d93e6cc commit 013901b
Show file tree
Hide file tree
Showing 7 changed files with 313 additions and 2 deletions.
11 changes: 11 additions & 0 deletions eng/pipelines/templates/jobs/live.tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ parameters:
TestOptions: $(TestOptions)
TestParallelization: '1'
TestVersionSupport: $(TestVersionSupport)
UseHttpFaultInjector: false

jobs:
- job:
Expand Down Expand Up @@ -54,6 +55,11 @@ jobs:
ServiceDirectory: ${{ parameters.ServiceDirectory }}
SDKType: ${{ parameters.SDKType }}

# macOS has known issues running HTTP fault injector, change this once
# https://github.com/Azure/azure-sdk-tools/pull/6216 is resolved
- ${{ if and(eq(parameters.UseHttpFaultInjector, 'true'), not(contains(variables['OSVmImage'], 'mac'))) }}:
- template: /eng/pipelines/templates/steps/http-fault-injector.yml

- template: ../steps/install-reporting-tools.yml
parameters:
JdkVersion: $(JavaTestVersion)
Expand Down Expand Up @@ -101,6 +107,11 @@ jobs:
${{ each var in parameters.EnvVars }}:
${{ var.key }}: ${{ var.value }}

# macOS has known issues running HTTP fault injector, change this once
# https://github.com/Azure/azure-sdk-tools/pull/6216 is resolved
- ${{ if and(eq(parameters.UseHttpFaultInjector, 'true'), not(contains(variables['OSVmImage'], 'mac'))) }}:
- template: /eng/pipelines/templates/steps/publish-http-fault-injector-logs.yml

- ${{ parameters.PostSteps }}

- ${{ if ne(parameters.DisableAzureResourceCreation, 'true') }}:
Expand Down
4 changes: 4 additions & 0 deletions eng/pipelines/templates/stages/archetype-sdk-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ parameters:
- name: LtsVersion
type: string
default: $(JavaTestVersion)
- name: UseHttpFaultInjector
type: boolean
default: false

stages:
- ${{ each cloud in parameters.CloudConfig }}:
Expand Down Expand Up @@ -145,6 +148,7 @@ stages:
DisableAzureResourceCreation: ${{ parameters.DisableAzureResourceCreation }}
TestGoals: ${{ parameters.TestGoals }}
TestOptions: ${{ parameters.TestOptions }}
UseHttpFaultInjector: ${{ parameters.UseHttpFaultInjector }}

MatrixConfigs:
# Enumerate platforms and additional platforms based on supported clouds (sparse platform<-->cloud matrix).
Expand Down
51 changes: 51 additions & 0 deletions eng/pipelines/templates/steps/http-fault-injector.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
steps:
- powershell: |
dotnet tool install azure.sdk.tools.httpfaultinjector --global --prerelease --add-source https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-net/nuget/v3/index.json
displayName: 'Install http-fault-injector'
- pwsh: |
Write-Host "##vso[task.setvariable variable=ASPNETCORE_Kestrel__Certificates__Default__Path]$(Build.SourcesDirectory)/eng/common/testproxy/dotnet-devcert.pfx"
Write-Host "##vso[task.setvariable variable=ASPNETCORE_Kestrel__Certificates__Default__Password]password"
Write-Host "##vso[task.setvariable variable=PROXY_MANUAL_START]true"
displayName: 'Configure Kestrel and PROXY_MANUAL_START Variables'
- powershell: |
if (Test-Path $env:JAVA_HOME\jre\lib\security) {
cd $env:JAVA_HOME\jre\lib\security
} elseif (Test-Path $env:JAVA_HOME\lib\security) {
cd $env:JAVA_HOME\lib\security
} else {
Write-Error "JDK directory structure is unknown and unsupported. JAVA_HOME is '$env:JAVA_HOME'"
exit 1
}
dotnet dev-certs https --export-path http-fault-injector.pfx
keytool -keystore cacerts -importcert -noprompt -trustcacerts -alias HttpFaultInject -file http-fault-injector.pfx -storepass changeit
displayName: 'Trust http-fault-injector self-signed certificate - windows'
condition: and(succeeded(), eq(variables['Agent.OS'],'Windows_NT'))
- bash: |
if [ -d ${JAVA_HOME}/jre/lib/security ]; then
cd ${JAVA_HOME}/jre/lib/security
elif [ -d ${JAVA_HOME}/lib/security ]; then
cd ${JAVA_HOME}/lib/security
else
echo "JDK directory structure is unknown and unsupported. JAVA_HOME is '${JAVA_HOME}'"
exit 1
fi
dotnet dev-certs https --export-path http-fault-injector.pfx
sudo keytool -keystore cacerts -importcert -noprompt -trustcacerts -alias HttpFaultInject -file http-fault-injector.pfx -storepass changeit
displayName: 'Trust http-fault-injector self-signed certificate - linux/mac'
condition: and(succeeded(), ne(variables['Agent.OS'],'Windows_NT'))
- pwsh: |
Start-Process http-fault-injector -NoNewWindow -PassThru -RedirectStandardOutput "$(Build.SourcesDirectory)/http-fault-injector.log"
displayName: 'Start http-fault-injector - windows'
condition: and(succeeded(), eq(variables['Agent.OS'],'Windows_NT'))
# nohup does NOT continue beyond the current session if you use it within powershell
- bash: |
nohup http-fault-injector > "$(Build.SourcesDirectory)/http-fault-injector.log" &
displayName: "Start http-fault-injector - linux/mac"
condition: and(succeeded(), ne(variables['Agent.OS'],'Windows_NT'))
24 changes: 24 additions & 0 deletions eng/pipelines/templates/steps/publish-http-fault-injector-logs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
steps:
- pwsh: |
if (Test-Path -Path "$(Build.SourcesDirectory)/http-fault-injector.log") {
Write-Host "##vso[task.setvariable variable=HAS_HTTP_FAULT_INJECTOR_LOGS]true"
} else {
Write-Host "##vso[task.setvariable variable=HAS_HTTP_FAULT_INJECTOR_LOGS]false"
}
displayName: 'Check if http-fault-injector log file exists'
condition: failed()
- pwsh: |
Copy-Item -Path "$(Build.SourcesDirectory)/http-fault-injector.log" -Destination "$(Build.SourcesDirectory)/fault.log"
condition: and(failed(), eq(variables['HAS_HTTP_FAULT_INJECTOR_LOGS'], 'true'))
- template: /eng/common/pipelines/templates/steps/publish-artifact.yml
parameters:
ArtifactName: "$(System.JobName)-http-fault-injector-logs"
ArtifactPath: "$(Build.SourcesDirectory)/fault.log"
CustomCondition: and(failed(), eq(variables['HAS_HTTP_FAULT_INJECTOR_LOGS'], 'true'))

- pwsh: |
Remove-Item -Force $(Build.SourcesDirectory)/fault.log
displayName: 'Cleanup http-fault-injector log file'
condition: and(failed(), eq(variables['HAS_HTTP_FAULT_INJECTOR_LOGS'], 'true'))
12 changes: 10 additions & 2 deletions eng/pipelines/templates/steps/retain-heap-dump-hprofs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ parameters:
default: 'oom-hprofs'

steps:
- pwsh: |
if ((Get-ChildItem -Path . -Recurse -Filter *.hprof -File).Count -eq 0) {
Write-Host "##vso[task.setvariable variable=HAS_OOM_PROFS]false"
} else {
Write-Host "##vso[task.setvariable variable=HAS_OOM_PROFS]true"
}
condition: always()
- pwsh: |
New-Item $(Build.ArtifactStagingDirectory)/${{ parameters.OomArtifactName }} -ItemType directory
foreach($hprofFile in (Get-ChildItem -Path . -Recurse -Filter *.hprof -File))
Expand All @@ -14,9 +22,9 @@ steps:
}
[System.IO.Compression.ZipFile]::CreateFromDirectory("$(Build.ArtifactStagingDirectory)/${{ parameters.OomArtifactName }}","$(Build.ArtifactStagingDirectory)/${{ parameters.OomArtifactName }}.zip")
displayName: 'Copy OOM hprofs'
condition: always()
condition: eq(variables['HAS_OOM_PROFS'], 'true')
- publish: $(Build.ArtifactStagingDirectory)/${{ parameters.OomArtifactName }}.zip
displayName: 'Publish OOM hprofs'
artifact: ${{ parameters.OomArtifactName }}-$(System.StageName)-$(System.JobName)-$(System.JobAttempt)
condition: always()
condition: eq(variables['HAS_OOM_PROFS'], 'true')
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
package com.azure.storage.blob.specialized

import com.azure.core.http.HttpClient
import com.azure.core.http.HttpClientProvider
import com.azure.core.http.HttpHeaderName
import com.azure.core.http.HttpRequest
import com.azure.core.http.HttpResponse
import com.azure.core.test.utils.TestUtils
import com.azure.core.util.BinaryData
import com.azure.core.util.Context
import com.azure.core.util.HttpClientOptions
import com.azure.core.util.UrlBuilder
import com.azure.core.util.logging.ClientLogger
import com.azure.storage.blob.APISpec
import com.azure.storage.blob.BlobClientBuilder
import com.azure.storage.common.implementation.Constants
import com.azure.storage.common.test.shared.TestHttpClientType
import com.azure.storage.common.test.shared.extensions.LiveOnly
import reactor.core.publisher.Mono
import spock.lang.IgnoreIf
import spock.util.environment.OperatingSystem

import java.nio.file.Files
import java.time.Duration
import java.util.concurrent.ThreadLocalRandom
import java.util.concurrent.atomic.AtomicInteger
/**
* Set of tests that use <a href="">HTTP fault injecting</a> to simulate scenarios where the network has random errors.
*/
@LiveOnly
// macOS has known issues running HTTP fault injector, change this once
// https://github.com/Azure/azure-sdk-tools/pull/6216 is resolved
@IgnoreIf({ OperatingSystem.current.family == OperatingSystem.Family.MAC_OS })
class HttpFaultInjectingTests extends APISpec {
private static final def LOGGER = new ClientLogger(HttpFaultInjectingTests.class)
private static final def UPSTREAM_URI_HEADER = HttpHeaderName.fromString("X-Upstream-Base-Uri")
private static final def HTTP_FAULT_INJECTOR_RESPONSE_HEADER
= HttpHeaderName.fromString("x-ms-faultinjector-response-option")

/**
* Tests downloading to file with fault injection.
*
* This test will upload a single blob of about 9MB and then download it in parallel 500 times. Each download will
* have its file contents compared to the original blob data. The test only cares about files that were properly
* downloaded, if a download fails with a network error it will be ignored. A requirement of 90% of files being
* successfully downloaded is also a requirement to prevent a case where most files failed to download and passing,
* hiding a true issue.
*/
def "download to file with fault injection"() {
setup:
def realFileBytes = new byte[9 * Constants.MB - 1]
ThreadLocalRandom.current().nextBytes(realFileBytes)

def blobName = generateBlobName()
cc.getBlobClient(blobName).upload(BinaryData.fromBytes(realFileBytes), true)

def downloadClient = new BlobClientBuilder()
.connectionString(environment.primaryAccount.connectionString)
.containerName(cc.getBlobContainerName())
.blobName(blobName)
.httpClient(new HttpFaultInjectingHttpClient(getFaultInjectingWrappedHttpClient()))
.buildClient()

def files = new ArrayList<File>(500)
for (def i = 0; i < 500; i++) {
def file = File.createTempFile(UUID.randomUUID().toString() + i, ".txt")
file.deleteOnExit()
files.add(file)
}
def successCount = new AtomicInteger()

when:
files.stream().parallel().forEach {
def validateFile = true
try {
downloadClient.downloadToFile(it.getAbsolutePath(), true)
} catch (Exception ex) {
// Don't let network exceptions fail the download
LOGGER.atWarning()
.log(() -> "Failed to complete download, target download file: " + it.getAbsolutePath(), ex)
validateFile = false
}

if (validateFile) {
def actualFileBytes = Files.readAllBytes(it.toPath())
TestUtils.assertArraysEqual(realFileBytes, actualFileBytes)
successCount.incrementAndGet()
}

Files.deleteIfExists(it.toPath())
}

then:
successCount.get() >= 450

cleanup:
files.forEach {Files.deleteIfExists(it.toPath()) }
}

def getFaultInjectingWrappedHttpClient() {
switch (environment.httpClientType) {
case TestHttpClientType.NETTY:
return HttpClient.createDefault(new HttpClientOptions()
.readTimeout(Duration.ofSeconds(5))
.responseTimeout(Duration.ofSeconds(5))
.setHttpClientProvider(Class.forName("com.azure.core.http.netty.NettyAsyncHttpClientProvider") as Class<? extends HttpClientProvider>))

case TestHttpClientType.OK_HTTP:
return HttpClient.createDefault(new HttpClientOptions()
.readTimeout(Duration.ofSeconds(5))
.responseTimeout(Duration.ofSeconds(5))
.setHttpClientProvider(Class.forName("com.azure.core.http.okhttp.OkHttpAsyncClientProvider") as Class<? extends HttpClientProvider>))

default:
throw new IllegalArgumentException("Unknown http client type: " + environment.httpClientType)
}
}

// For now a local implementation is here in azure-storage-blob until this is released in azure-core-test.
// Since this is a local definition with a clear set of configurations everything is simplified.
private static final class HttpFaultInjectingHttpClient implements HttpClient {
private final HttpClient wrappedHttpClient

HttpFaultInjectingHttpClient(HttpClient wrappedHttpClient) {
this.wrappedHttpClient = wrappedHttpClient
}

@Override
Mono<HttpResponse> send(HttpRequest request) {
return send(request, Context.NONE)
}

@Override
Mono<HttpResponse> send(HttpRequest request, Context context) {
URL originalUrl = request.getUrl()
request.setHeader(UPSTREAM_URI_HEADER, originalUrl.toString()).setUrl(rewriteUrl(originalUrl))
String faultType = faultInjectorHandling()
request.setHeader(HTTP_FAULT_INJECTOR_RESPONSE_HEADER, faultType)

return wrappedHttpClient.send(request, context)
.map(response -> {
HttpRequest request1 = response.getRequest()
request1.getHeaders().remove(UPSTREAM_URI_HEADER)
request1.setUrl(originalUrl)

return response
})
}

@Override
HttpResponse sendSync(HttpRequest request, Context context) {
URL originalUrl = request.getUrl()
request.setHeader(UPSTREAM_URI_HEADER, originalUrl.toString()).setUrl(rewriteUrl(originalUrl))
String faultType = faultInjectorHandling()
request.setHeader(HTTP_FAULT_INJECTOR_RESPONSE_HEADER, faultType)

HttpResponse response = wrappedHttpClient.sendSync(request, context)
response.getRequest().setUrl(originalUrl)
response.getRequest().getHeaders().remove(UPSTREAM_URI_HEADER)

return response
}

private static URL rewriteUrl(URL originalUrl) {
try {
return UrlBuilder.parse(originalUrl)
.setScheme("http")
.setHost("localhost")
.setPort(7777)
.toUrl()
} catch (MalformedURLException e) {
throw new RuntimeException(e)
}
}

private static String faultInjectorHandling() {
// f: Full response
// p: Partial Response (full headers, 50% of body), then wait indefinitely
// pc: Partial Response (full headers, 50% of body), then close (TCP FIN)
// pa: Partial Response (full headers, 50% of body), then abort (TCP RST)
// pn: Partial Response (full headers, 50% of body), then finish normally
// n: No response, then wait indefinitely
// nc: No response, then close (TCP FIN)
// na: No response, then abort (TCP RST)
def random = ThreadLocalRandom.current().nextDouble()
def choice = (int) (random * 100)

if (choice >= 25) {
// 75% of requests complete without error.
return "f"
} else if (choice >= 1) {
if (random <= 0.34D) {
return "n"
} else if (random <= 0.67D) {
return "nc"
} else {
return "na"
}
} else {
if (random <= 0.25D) {
return "p"
} else if (random <= 0.50D) {
return "pc"
} else if (random <= 0.75D) {
return "pa"
} else {
return "pn"
}
}
}
}
}
1 change: 1 addition & 0 deletions sdk/storage/tests-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,4 @@ stages:
AZURE_CLIENT_SECRET: $(aad-azure-sdk-test-client-secret)
VERSIONED_STORAGE_ACCOUNT_NAME: $(java-storage-versioned-account-name)
VERSIONED_STORAGE_ACCOUNT_KEY: $(java-storage-versioned-account-key)
UseHttpFaultInjector: true

0 comments on commit 013901b

Please sign in to comment.