From 2735ec27f3139222f2fa9eb2ebcfbb4272b85482 Mon Sep 17 00:00:00 2001 From: Peter Somogyvari Date: Tue, 23 Mar 2021 16:16:12 -0700 Subject: [PATCH] feat(test-tooling): containers pull image retries exp. back-off #656 Potentially fixing #656. Definitely improves the situation but it is impossible to tell in advance if this will make all the other- wise non-reproducible issues go away. Fingers crossed. This change makes it so that the pullImage(...) method of the Containers utility class will now - by default - retry 6 times if the docker image pulling has failed. The internval between retries is increasing exponentially (power of two) starting from one second as the delay then proceeding to be 2^6 seconds for the final retry (which if also fails then an AbortError is thrown by the underlying pRetry library that is powering the retry mechanism.) For reference, here is a randomly failed CI test execution where the logs show that DockerHub is randomly in- accessible over the network and that's another thing that makes our tests flaky, hence this commit to fix this. https://github.com/hyperledger/cactus/runs/2178802580?check_suite_focus=true#step:8:2448 In case that link goes dead in the future, here's also the actual logs: not ok 60 - packages/cactus-test-cmd-api-server/src/test/typescript/integration/remote-plugin-imports.test.ts # time=25389.665ms --- env: TS_NODE_COMPILER_OPTIONS: '{"jsx":"react"}' file: packages/cactus-test-cmd-api-server/src/test/typescript/integration/remote-plugin-imports.test.ts timeout: 1800000 command: /opt/hostedtoolcache/node/12.13.0/x64/bin/node args: - -r - /home/runner/work/cactus/cactus/node_modules/ts-node/register/index.js - --max-old-space-size=4096 - packages/cactus-test-cmd-api-server/src/test/typescript/integration/remote-plugin-imports.test.ts stdio: - 0 - pipe - 2 cwd: /home/runner/work/cactus/cactus exitCode: 1 ... { # NodeJS API server + Rust plugin work together [2021-03-23T20:45:51.458Z] INFO (VaultTestServer): Created VaultTestServer OK. Image FQN: vault:1.6.1 not ok 1 Error: (HTTP code 500) server error - Get https://registry-1.docker.io/v2/: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers) --- operator: error at: bound (/home/runner/work/cactus/cactus/node_modules/onetime/index.js:30:12) stack: |- Error: (HTTP code 500) server error - Get https://registry-1.docker.io/v2/: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers) at /home/runner/work/cactus/cactus/packages/cactus-test-tooling/node_modules/docker-modem/lib/modem.js:301:17 at IncomingMessage. (/home/runner/work/cactus/cactus/packages/cactus-test-tooling/node_modules/docker-modem/lib/modem.js:328:9) at IncomingMessage.emit (events.js:215:7) at endReadableNT (_stream_readable.js:1183:12) at processTicksAndRejections (internal/process/task_queues.js:80:21) ... Bail out! Error: (HTTP code 500) server error - Get https://registry-1.docker.io/v2/: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers) } Bail out! Error: (HTTP code 500) server error - Get https://registry-1.docker.io/v2/: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers) Signed-off-by: Peter Somogyvari --- .../cactus-test-tooling/package-lock.json | 19 +++++ packages/cactus-test-tooling/package.json | 8 +- .../src/main/typescript/common/containers.ts | 85 +++++++++++++------ 3 files changed, 85 insertions(+), 27 deletions(-) diff --git a/packages/cactus-test-tooling/package-lock.json b/packages/cactus-test-tooling/package-lock.json index f42d35db2c..0621ec6258 100644 --- a/packages/cactus-test-tooling/package-lock.json +++ b/packages/cactus-test-tooling/package-lock.json @@ -93,6 +93,11 @@ "@types/node": "*" } }, + "@types/retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", + "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" + }, "@types/secp256k1": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/@types/secp256k1/-/secp256k1-4.0.1.tgz", @@ -3064,6 +3069,15 @@ "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", "integrity": "sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=" }, + "p-retry": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.4.0.tgz", + "integrity": "sha512-gVB/tBsG+3AHI1SyDHRrX6n9ZL0Bcbifps9W9/Bgu3Oyu4/OrAh8SvDzDsvpP0oxfCt3oWNT+0fQ9LyUGwBTLg==", + "requires": { + "@types/retry": "^0.12.0", + "retry": "^0.12.0" + } + }, "p-timeout": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-1.2.1.tgz", @@ -3316,6 +3330,11 @@ "lowercase-keys": "^1.0.0" } }, + "retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", + "integrity": "sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=" + }, "rimraf": { "version": "2.7.1", "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.7.1.tgz", diff --git a/packages/cactus-test-tooling/package.json b/packages/cactus-test-tooling/package.json index 7abec1e5cb..6c33b77794 100644 --- a/packages/cactus-test-tooling/package.json +++ b/packages/cactus-test-tooling/package.json @@ -31,7 +31,10 @@ "ignore": [ "src/**/generated/*" ], - "extensions": ["ts", "json"], + "extensions": [ + "ts", + "json" + ], "quiet": true, "verbose": false, "runOnChangeOnly": true @@ -91,6 +94,7 @@ "loglevel": "1.6.7", "loglevel-plugin-prefix": "0.8.4", "node-ssh": "11.1.1", + "p-retry": "4.4.0", "tar-stream": "2.1.2", "typescript-optional": "2.0.1", "web3": "1.2.7" @@ -100,8 +104,8 @@ "@types/extract-zip": "1.6.2", "@types/fs-extra": "8.1.0", "@types/joi": "14.3.4", - "@types/ssh2": "0.5.44", "@types/node-ssh": "7.0.0", + "@types/ssh2": "0.5.44", "@types/tar-stream": "2.1.0" } } diff --git a/packages/cactus-test-tooling/src/main/typescript/common/containers.ts b/packages/cactus-test-tooling/src/main/typescript/common/containers.ts index 78ded092aa..f3d14f917c 100644 --- a/packages/cactus-test-tooling/src/main/typescript/common/containers.ts +++ b/packages/cactus-test-tooling/src/main/typescript/common/containers.ts @@ -5,9 +5,15 @@ import { Container, ContainerInfo } from "dockerode"; import Dockerode from "dockerode"; import tar from "tar-stream"; import fs from "fs-extra"; +import pRetry from "p-retry"; import { Streams } from "../common/streams"; -import { Checks, LoggerProvider, Strings } from "@hyperledger/cactus-common"; -import { LogLevelDesc } from "loglevel"; +import { + Checks, + LogLevelDesc, + LoggerProvider, + Strings, + ILoggerOptions, +} from "@hyperledger/cactus-common"; export interface IPruneDockerResourcesRequest { logLevel?: LogLevelDesc; @@ -297,33 +303,62 @@ export class Containers { return NetworkSettings.Networks[networkNames[0]].IPAddress; } } - public static pullImage( - containerNameAndTag: string, - pullOptions: any = {}, + imageFqn: string, + options: any = {}, + logLevel?: LogLevelDesc, + ): Promise { + const defualtLoggerOptions: ILoggerOptions = { + label: "containers#pullImage()", + level: logLevel || "INFO", + }; + const log = LoggerProvider.getOrCreate(defualtLoggerOptions); + const task = () => Containers.tryPullImage(imageFqn, options, logLevel); + const retryOptions: pRetry.Options = { + retries: 6, + onFailedAttempt: async (ex) => { + log.debug(`Failed attempt at pulling container image ${imageFqn}`, ex); + }, + }; + return pRetry(task, retryOptions); + } + + public static tryPullImage( + imageFqn: string, + options: any = {}, + logLevel?: LogLevelDesc, ): Promise { return new Promise((resolve, reject) => { + const loggerOptions: ILoggerOptions = { + label: "containers#tryPullImage()", + level: logLevel || "INFO", + }; + const log = LoggerProvider.getOrCreate(loggerOptions); + const docker = new Dockerode(); - docker.pull( - containerNameAndTag, - pullOptions, - (pullError: any, stream: any) => { - if (pullError) { - reject(pullError); - } else { - docker.modem.followProgress( - stream, - (progressError: any, output: any[]) => { - if (progressError) { - reject(progressError); - } else { - resolve(output); - } - }, - ); - } - }, - ); + + const pullStreamStartedHandler = (pullError: any, stream: any) => { + if (pullError) { + log.error(`Could not even start ${imageFqn} pull:`, pullError); + reject(pullError); + } else { + log.debug(`Started ${imageFqn} pull progress stream OK`); + docker.modem.followProgress( + stream, + (progressError: any, output: any[]) => { + if (progressError) { + log.error(`Failed to finish ${imageFqn} pull:`, progressError); + reject(progressError); + } else { + log.debug(`Finished ${imageFqn} pull completely OK`); + resolve(output); + } + }, + ); + } + }; + + docker.pull(imageFqn, options, pullStreamStartedHandler); }); }