Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: expose pod check attempts and delays as environment controlled variables #812

Merged
merged 1 commit into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/commands/mirror_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -238,43 +238,43 @@ export class MirrorNodeCommand extends BaseCommand {
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=postgresql',
'app.kubernetes.io/name=postgres'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check REST API',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=rest',
'app.kubernetes.io/name=rest'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check GRPC',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=grpc',
'app.kubernetes.io/name=grpc'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check Monitor',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=monitor',
'app.kubernetes.io/name=monitor'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check Importer',
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=importer',
'app.kubernetes.io/name=importer'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
},
{
title: 'Check Hedera Explorer',
skip: (ctx) => !ctx.config.deployHederaExplorer,
task: async () => await self.k8.waitForPodReady([
'app.kubernetes.io/component=hedera-explorer',
'app.kubernetes.io/name=hedera-explorer'
], 1, 300, 2000)
], 1, constants.PODS_READY_MAX_ATTEMPTS, constants.PODS_READY_DELAY)
}
], {
concurrent: true,
Expand Down
8 changes: 4 additions & 4 deletions src/commands/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ export class NetworkCommand extends BaseCommand {
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=network-node',
`solo.hedera.com/node-name=${nodeAlias}`
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})
}

Expand All @@ -363,7 +363,7 @@ export class NetworkCommand extends BaseCommand {
task: async () =>
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=haproxy'
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})
}

Expand All @@ -374,7 +374,7 @@ export class NetworkCommand extends BaseCommand {
task: async () =>
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=envoy-proxy'
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})
}

Expand All @@ -399,7 +399,7 @@ export class NetworkCommand extends BaseCommand {
task: async () =>
await self.k8.waitForPodReady([
'v1.min.io/tenant=minio'
], 1, 60 * 5, 1000) // timeout 5 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY)
})

// set up the sub-tasks
Expand Down
18 changes: 12 additions & 6 deletions src/commands/node/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ import {
} from '../../core/index.ts'
import {
DEFAULT_NETWORK_NODE_NAME,
FREEZE_ADMIN_ACCOUNT, HEDERA_NODE_DEFAULT_STAKE_AMOUNT,
LOCAL_HOST, SECONDS,
FREEZE_ADMIN_ACCOUNT,
HEDERA_NODE_DEFAULT_STAKE_AMOUNT,
LOCAL_HOST,
SECONDS,
TREASURY_ACCOUNT_ID
} from '../../core/constants.ts'
import {
Expand Down Expand Up @@ -282,7 +284,9 @@ export class NodeCommandTasks {

async _checkNetworkNodeActiveness (namespace: string, nodeAlias: NodeAlias, task: ListrTaskWrapper<any, any, any>,
title: string, index: number, status = NodeStatusCodes.ACTIVE,
maxAttempts = 120, delay = 1_000, timeout = 1_000
maxAttempts = constants.NETWORK_NODE_ACTIVE_MAX_ATTEMPTS,
delay = constants.NETWORK_NODE_ACTIVE_DELAY,
timeout = constants.NETWORK_NODE_ACTIVE_TIMEOUT
) {
nodeAlias = nodeAlias.trim() as NodeAlias
const podName = Templates.renderNetworkPodName(nodeAlias)
Expand Down Expand Up @@ -364,7 +368,7 @@ export class NodeCommandTasks {
title: `Check proxy for node: ${chalk.yellow(nodeAlias)}`,
task: async () => await this.k8.waitForPodReady(
[`app=haproxy-${nodeAlias}`, 'solo.hedera.com/type=haproxy'],
1, 300, 2000)
1, constants.NETWORK_PROXY_MAX_ATTEMPTS, constants.NETWORK_PROXY_DELAY)
})
}

Expand Down Expand Up @@ -626,7 +630,9 @@ export class NodeCommandTasks {
}

/** Check if the network node pod is running */
async checkNetworkNodePod (namespace: string, nodeAlias: NodeAlias, maxAttempts = 60, delay = 2000) {
async checkNetworkNodePod (namespace: string, nodeAlias: NodeAlias,
maxAttempts = constants.PODS_RUNNING_MAX_ATTEMPTS,
delay = constants.PODS_RUNNING_DELAY) {
nodeAlias = nodeAlias.trim() as NodeAlias
const podName = Templates.renderNetworkPodName(nodeAlias)

Expand Down Expand Up @@ -1221,7 +1227,7 @@ export class NodeCommandTasks {
await this.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'solo.hedera.com/type=network-node',
`solo.hedera.com/node-name=${nodeAlias}`
], 1, 60 * 15, 1000) // timeout 15 minutes
], 1, constants.PODS_RUNNING_MAX_ATTEMPTS, constants.PODS_RUNNING_DELAY) // timeout 15 minutes
})
}

Expand Down
4 changes: 2 additions & 2 deletions src/commands/relay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ export class RelayCommand extends BaseCommand {
await self.k8.waitForPods([constants.POD_PHASE_RUNNING], [
'app=hedera-json-rpc-relay',
`app.kubernetes.io/instance=${config.releaseName}`
], 1, 900, 1000)
], 1, constants.RELAY_PODS_RUNNING_MAX_ATTEMPTS, constants.RELAY_PODS_RUNNING_DELAY)

// reset nodeAlias
self.configManager.setFlag(flags.nodeAliasesUnparsed, '')
Expand All @@ -248,7 +248,7 @@ export class RelayCommand extends BaseCommand {
await self.k8.waitForPodReady([
'app=hedera-json-rpc-relay',
`app.kubernetes.io/instance=${config.releaseName}`
], 1, 100, 2000)
], 1, constants.RELAY_PODS_READY_MAX_ATTEMPTS, constants.RELAY_PODS_READY_DELAY)
} catch (e: Error | any) {
throw new SoloError(`Relay ${config.releaseName} is not ready: ${e.message}`, e)
}
Expand Down
21 changes: 18 additions & 3 deletions src/core/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ export const JVM_DEBUG_PORT = 5005
export const SECONDS = 1000
export const MINUTES = 60 * SECONDS

export const LEASE_AQUIRE_RETRY_TIMEOUT = 20 * SECONDS
export const MAX_LEASE_ACQUIRE_ATTEMPTS = 10
export const LEASE_RENEW_TIMEOUT = 10 * SECONDS
export const LEASE_ACQUIRE_RETRY_TIMEOUT = +process.env.LEASE_ACQUIRE_RETRY_TIMEOUT || 20 * SECONDS
export const MAX_LEASE_ACQUIRE_ATTEMPTS = +process.env.MAX_LEASE_ACQUIRE_ATTEMPTS || 10
export const LEASE_RENEW_TIMEOUT = +process.env.LEASE_RENEW_TIMEOUT || 10 * SECONDS

export const PODS_RUNNING_MAX_ATTEMPTS = +process.env.PODS_RUNNING_MAX_ATTEMPTS || 60 * 15
export const PODS_RUNNING_DELAY = +process.env.PODS_RUNNING_DELAY || 1000
export const NETWORK_NODE_ACTIVE_MAX_ATTEMPTS = +process.env.NETWORK_NODE_ACTIVE_MAX_ATTEMPTS || 120
export const NETWORK_NODE_ACTIVE_DELAY = +process.env.NETWORK_NODE_ACTIVE_DELAY || 1_000
export const NETWORK_NODE_ACTIVE_TIMEOUT = +process.env.NETWORK_NODE_ACTIVE_TIMEOUT || 1_000
export const NETWORK_PROXY_MAX_ATTEMPTS = +process.env.NETWORK_PROXY_MAX_ATTEMPTS || 300
export const NETWORK_PROXY_DELAY = +process.env.NETWORK_PROXY_DELAY || 2000
export const PODS_READY_MAX_ATTEMPTS = +process.env.PODS_READY_MAX_ATTEMPTS || 300
export const PODS_READY_DELAY = +process.env.PODS_READY_DELAY || 2_000
export const RELAY_PODS_RUNNING_MAX_ATTEMPTS = +process.env.RELAY_PODS_RUNNING_MAX_ATTEMPTS || 900
export const RELAY_PODS_RUNNING_DELAY = +process.env.RELAY_PODS_RUNNING_DELAY || 1_000
export const RELAY_PODS_READY_MAX_ATTEMPTS = +process.env.RELAY_PODS_READY_MAX_ATTEMPTS || 100
export const RELAY_PODS_READY_DELAY = +process.env.RELAY_PODS_READY_DELAY || 1_000

4 changes: 2 additions & 2 deletions src/core/k8.ts
Original file line number Diff line number Diff line change
Expand Up @@ -910,8 +910,8 @@ export class K8 {
}
}

async waitForPods (phases = [constants.POD_PHASE_RUNNING], labels: string[] = [], podCount = 1, maxAttempts = 10,
delay = 500, podItemPredicate?: (items: k8s.V1Pod) => any): Promise<k8s.V1Pod[]> {
async waitForPods (phases = [constants.POD_PHASE_RUNNING], labels: string[] = [], podCount = 1, maxAttempts = constants.PODS_RUNNING_MAX_ATTEMPTS,
delay = constants.PODS_RUNNING_DELAY, podItemPredicate?: (items: k8s.V1Pod) => any): Promise<k8s.V1Pod[]> {
const ns = this._getNamespace()
const labelSelector = labels.join(',')

Expand Down
8 changes: 4 additions & 4 deletions src/core/lease_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { flags } from '../commands/index.ts'
import type { ConfigManager } from './config_manager.ts'
import type { K8 } from './k8.ts'
import type { SoloLogger } from './logging.ts'
import { LEASE_RENEW_TIMEOUT, LEASE_AQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, OS_USERNAME } from './constants.ts'
import { LEASE_RENEW_TIMEOUT, LEASE_ACQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, OS_USERNAME } from './constants.ts'
import type { ListrTaskWrapper } from 'listr2'
import chalk from 'chalk'
import { sleep } from './helpers.ts'
Expand Down Expand Up @@ -131,12 +131,12 @@ export class LeaseManager {
throw new SoloError(`Failed to acquire lease, max attempt reached ${attempt}`)
}

this.logger.info(`Lease is already taken retrying in ${LEASE_AQUIRE_RETRY_TIMEOUT}`)
this.logger.info(`Lease is already taken retrying in ${LEASE_ACQUIRE_RETRY_TIMEOUT}`)

task.title = `${title} - ${chalk.gray(`lease exists, attempting again in ${LEASE_AQUIRE_RETRY_TIMEOUT} seconds`)}` +
task.title = `${title} - ${chalk.gray(`lease exists, attempting again in ${LEASE_ACQUIRE_RETRY_TIMEOUT} seconds`)}` +
`, attempt: ${chalk.cyan(attempt.toString())}/${chalk.cyan(maxAttempts.toString())}`

await sleep(LEASE_AQUIRE_RETRY_TIMEOUT)
await sleep(LEASE_ACQUIRE_RETRY_TIMEOUT)

return this.acquireLeaseOrRetry(username, leaseName, namespace, task, title, attempt)
}
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/integration/core/lease_manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { expect } from 'chai'
import { flags } from '../../../../src/commands/index.ts'
import { e2eTestSuite, getDefaultArgv, TEST_CLUSTER } from '../../../test_util.ts'
import * as version from '../../../../version.ts'
import { LEASE_AQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, MINUTES } from '../../../../src/core/constants.ts'
import { LEASE_ACQUIRE_RETRY_TIMEOUT, MAX_LEASE_ACQUIRE_ATTEMPTS, MINUTES } from '../../../../src/core/constants.ts'
import { sleep } from '../../../../src/core/helpers.js'

const namespace = 'lease-mngr-e2e'
Expand Down Expand Up @@ -68,7 +68,7 @@ e2eTestSuite(namespace, argv, undefined, undefined, undefined, undefined, undefi
// @ts-ignore to access private property
await blockedLease.acquireTask({ title }, title, MAX_LEASE_ACQUIRE_ATTEMPTS - 1)

await sleep(LEASE_AQUIRE_RETRY_TIMEOUT * 2)
await sleep(LEASE_ACQUIRE_RETRY_TIMEOUT * 2)
} catch (e: Error | any) {
expect(e.message).to.contain('Failed to acquire lease, max attempt reached')
}
Expand Down
Loading