Skip to content

Commit

Permalink
feat: expose pod check attempts and delays as environment controlled …
Browse files Browse the repository at this point in the history
…variables (#812)

Signed-off-by: Jeffrey Tang <[email protected]>
  • Loading branch information
JeffreyDallas authored Nov 8, 2024
1 parent 5ac04bc commit a7b3279
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 29 deletions.
12 changes: 6 additions & 6 deletions src/commands/mirror_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -238,43 +238,43 @@ export class MirrorNodeCommand extends BaseCommand {
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=postgresql',
'app.kubernetes.io/name=postgres'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check REST API',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=rest',
'app.kubernetes.io/name=rest'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check GRPC',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=grpc',
'app.kubernetes.io/name=grpc'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check Monitor',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=monitor',
'app.kubernetes.io/name=monitor'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check Importer',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=importer',
'app.kubernetes.io/name=importer'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check Hedera Explorer',
skip: (ctx) => !ctx.config.deployHederaExplorer,
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=hedera-explorer',
'app.kubernetes.io/name=hedera-explorer'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
}
], {
concurrent: true,
Expand Down
8 changes: 4 additions & 4 deletions src/commands/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ export class NetworkCommand extends BaseCommand {
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=network-node',
`solo.hedera.com/node-name=${nodeAlias}`
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})
}

Expand All @@ -363,7 +363,7 @@ export class NetworkCommand extends BaseCommand {
task: async () =>
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=haproxy'
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})
}

Expand All @@ -374,7 +374,7 @@ export class NetworkCommand extends BaseCommand {
task: async () =>
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=envoy-proxy'
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})
}

Expand All @@ -399,7 +399,7 @@ export class NetworkCommand extends BaseCommand {
task: async () =>
await self.k8.waitForPodReady([
'v1.min.io/tenant=minio'
], 1, 60 * 5, 1000) // timeout 5 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})

// set up the sub-tasks
Expand Down
18 changes: 12 additions & 6 deletions src/commands/node/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ import {
} from '../../core/index.ts'
import {
DEFAULT_NETWORK_NODE_NAME,
FREEZE_ADMIN_ACCOUNT, HEDERA_NODE_DEFAULT_STAKE_AMOUNT,
LOCAL_HOST, SECONDS,
FREEZE_ADMIN_ACCOUNT,
HEDERA_NODE_DEFAULT_STAKE_AMOUNT,
LOCAL_HOST,
SECONDS,
TREASURY_ACCOUNT_ID
} from '../../core/constants.ts'
import {
Expand Down Expand Up @@ -282,7 +284,9 @@ export class NodeCommandTasks {

async _checkNetworkNodeActiveness (namespace: string, nodeAlias: NodeAlias, task: ListrTaskWrapper<any, any, any>,
title: string, index: number, status = NodeStatusCodes.ACTIVE,
maxAttempts = 120, delay = 1_000, timeout = 1_000
maxAttempts = constants.NETWORK_NODE_ACTIVE_MAX_ATTEMPTS,
delay = constants.NETWORK_NODE_ACTIVE_DELAY,
timeout = constants.NETWORK_NODE_ACTIVE_TIMEOUT
) {
nodeAlias = nodeAlias.trim() as NodeAlias
const podName = Templates.renderNetworkPodName(nodeAlias)
Expand Down Expand Up @@ -364,7 +368,7 @@ export class NodeCommandTasks {
title: `Check proxy for node: ${chalk.yellow(nodeAlias)}`,
task: async () => await this.k8.waitForPodReady(
[`app=haproxy-${nodeAlias}`, 'solo.hedera.com/type=haproxy'],
1, 300, 2000)
1, constants.NETWORK_PROXY_MAX_ATTEMPTS, constants.NETWORK_PROXY_DELAY)
})
}

Expand Down Expand Up @@ -626,7 +630,9 @@ export class NodeCommandTasks {
}

/** Check if the network node pod is running */
async checkNetworkNodePod (namespace: string, nodeAlias: NodeAlias, maxAttempts = 60, delay = 2000) {
async checkNetworkNodePod (namespace: string, nodeAlias: NodeAlias,
maxAttempts = constants.PODS_RUNNING_MAX_ATTEMPTS,
delay = constants.PODS_RUNNING_DELAY) {
nodeAlias = nodeAlias.trim() as NodeAlias
const podName = Templates.renderNetworkPodName(nodeAlias)

Expand Down Expand Up @@ -1221,7 +1227,7 @@ export class NodeCommandTasks {
await this.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=network-node',
`solo.hedera.com/node-name=${nodeAlias}`
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY) // timeout 15 minutes
})
}

Expand Down
4 changes: 2 additions & 2 deletions src/commands/relay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ export class RelayCommand extends BaseCommand {
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'app=hedera-json-rpc-relay',
`app.kubernetes.io/instance=${config.releaseName}`
], 1, 900, 1000)
], 1, constants.RELAY_PODS_RUNNING_MAX_ATTEMPTS, constants.RELAY_PODS_RUNNING_DELAY)

// reset nodeAlias
self.configManager.setFlag(flags.nodeAliasesUnparsed, '')
Expand All @@ -248,7 +248,7 @@ export class RelayCommand extends BaseCommand {
await self.k8.waitForPodReady([
'app=hedera-json-rpc-relay',
`app.kubernetes.io/instance=${config.releaseName}`
], 1, 100, 2000)
], 1, constants.RELAY_PODS_READY_MAX_ATTEMPTS, constants.RELAY_PODS_READY_DELAY)
} catch (e: Error | any) {
throw new SoloError(`Relay ${config.releaseName} is not ready: ${e.message}`, e)
}
Expand Down
21 changes: 18 additions & 3 deletions src/core/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ export const JVM_DEBUG_PORT = 5005
export const SECONDS = 1000
export const MINUTES = 60 * SECONDS

export const LEASE_AQUIRE_RETRY_TIMEOUT = 20 * SECONDS
export const MAX_LEASE_ACQUIRE_ATTEMPTS = 10
export const LEASE_RENEW_TIMEOUT = 10 * SECONDS
export const LEASE_ACQUIRE_RETRY_TIMEOUT = +process.env.LEASE_ACQUIRE_RETRY_TIMEOUT || 20 * SECONDS
export const MAX_LEASE_ACQUIRE_ATTEMPTS = +process.env.MAX_LEASE_ACQUIRE_ATTEMPTS || 10
export const LEASE_RENEW_TIMEOUT = +process.env.LEASE_RENEW_TIMEOUT || 10 * SECONDS

export const PODS_RUNNING_MAX_ATTEMPTS = +process.env.PODS_RUNNING_MAX_ATTEMPTS || 60 * 15
export const PODS_RUNNING_DELAY = +process.env.PODS_RUNNING_DELAY || 1000
export const NETWORK_NODE_ACTIVE_MAX_ATTEMPTS = +process.env.NETWORK_NODE_ACTIVE_MAX_ATTEMPTS || 120
export const NETWORK_NODE_ACTIVE_DELAY = +process.env.NETWORK_NODE_ACTIVE_DELAY || 1_000
export const NETWORK_NODE_ACTIVE_TIMEOUT = +process.env.NETWORK_NODE_ACTIVE_TIMEOUT || 1_000
export const NETWORK_PROXY_MAX_ATTEMPTS = +process.env.NETWORK_PROXY_MAX_ATTEMPTS || 300
export const NETWORK_PROXY_DELAY = +process.env.NETWORK_PROXY_DELAY || 2000
export const PODS_READY_MAX_ATTEMPTS = +process.env.PODS_READY_MAX_ATTEMPTS || 300
export const PODS_READY_DELAY = +process.env.PODS_READY_DELAY || 2_000
export const RELAY_PODS_RUNNING_MAX_ATTEMPTS = +process.env.RELAY_PODS_RUNNING_MAX_ATTEMPTS || 900
export const RELAY_PODS_RUNNING_DELAY = +process.env.RELAY_PODS_RUNNING_DELAY || 1_000
export const RELAY_PODS_READY_MAX_ATTEMPTS = +process.env.RELAY_PODS_READY_MAX_ATTEMPTS || 100
export const RELAY_PODS_READY_DELAY = +process.env.RELAY_PODS_READY_DELAY || 1_000

4 changes: 2 additions & 2 deletions src/core/k8.ts
Original file line number Diff line number Diff line change
Expand Up @@ -910,8 +910,8 @@ export class K8 {
}
}

async waitForPods (phases = [constants.POD_PHASE_RUNNING], labels: string[] = [], podCount = 1, maxAttempts = 10,
delay = 500, podItemPredicate?: (items: k8s.V1Pod) => any): Promise<k8s.V1Pod[]> {
async waitForPods (phases = [constants.POD_PHASE_RUNNING], labels: string[] = [], podCount = 1, maxAttempts = constants.PODS_RUNNING_MAX_ATTEMPTS,
delay = constants.PODS_RUNNING_DELAY, podItemPredicate?: (items: k8s.V1Pod) => any): Promise<k8s.V1Pod[]> {
const ns = this._getNamespace()
const labelSelector = labels.join(',')

Expand Down
8 changes: 4 additions & 4 deletions src/core/lease_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { flags } from '../commands/index.ts'
import type { ConfigManager } from './config_manager.ts'
import type { K8 } from './k8.ts'
import type { SoloLogger } from './logging.ts'
import { LEASE_RENEW_TIMEOUT, LEASE_AQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, OS_USERNAME } from './constants.ts'
import { LEASE_RENEW_TIMEOUT, LEASE_ACQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, OS_USERNAME } from './constants.ts'
import type { ListrTaskWrapper } from 'listr2'
import chalk from 'chalk'
import { sleep } from './helpers.ts'
Expand Down Expand Up @@ -131,12 +131,12 @@ export class LeaseManager {
throw new SoloError(`Failed to acquire lease, max attempt reached ${attempt}`)
}

this.logger.info(`Lease is already taken retrying in ${LEASE_AQUIRE_RETRY_TIMEOUT}`)
this.logger.info(`Lease is already taken retrying in ${LEASE_ACQUIRE_RETRY_TIMEOUT}`)

task.title = `${title} - ${chalk.gray(`lease exists, attempting again in ${LEASE_AQUIRE_RETRY_TIMEOUT} seconds`)}` +
task.title = `${title} - ${chalk.gray(`lease exists, attempting again in ${LEASE_ACQUIRE_RETRY_TIMEOUT} seconds`)}` +
`, attempt: ${chalk.cyan(attempt.toString())}/${chalk.cyan(maxAttempts.toString())}`

await sleep(LEASE_AQUIRE_RETRY_TIMEOUT)
await sleep(LEASE_ACQUIRE_RETRY_TIMEOUT)

return this.acquireLeaseOrRetry(username, leaseName, namespace, task, title, attempt)
}
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/integration/core/lease_manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { expect } from 'chai'
import { flags } from '../../../../src/commands/index.ts'
import { e2eTestSuite, getDefaultArgv, TEST_CLUSTER } from '../../../test_util.ts'
import * as version from '../../../../version.ts'
import { LEASE_AQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, MINUTES } from '../../../../src/core/constants.ts'
import { LEASE_ACQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, MINUTES } from '../../../../src/core/constants.ts'
import { sleep } from '../../../../src/core/helpers.js'

const namespace = 'lease-mngr-e2e'
Expand Down Expand Up @@ -68,7 +68,7 @@ e2eTestSuite(namespace, argv, undefined, undefined, undefined, undefined, undefi
// @ts-ignore to access private property
await blockedLease.acquireTask({ title }, title, MAX_LEASE_ACQUIRE_ATTEMPTS - 1)

await sleep(LEASE_AQUIRE_RETRY_TIMEOUT * 2)
await sleep(LEASE_ACQUIRE_RETRY_TIMEOUT * 2)
} catch (e: Error | any) {
expect(e.message).to.contain('Failed to acquire lease, max attempt reached')
}
Expand Down

0 comments on commit a7b3279

Please sign in to comment.