Skip to content

Commit

Permalink
fix: performance improvements for node stop (#986)
Browse files Browse the repository at this point in the history
Signed-off-by: Jeromy Cannon <[email protected]>
  • Loading branch information
jeromy-cannon authored Dec 12, 2024
1 parent 83fb584 commit 437bbc6
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/commands/node/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ export class NodeCommandHandlers implements CommandHandlers {
this.validateAllNodeStates({
acceptedStates: [ConsensusNodeStates.STARTED, ConsensusNodeStates.SETUP],
}),
this.tasks.identifyNetworkPods(),
this.tasks.identifyNetworkPods(1),
this.tasks.stopNodes(),
this.changeAllNodeStates(ConsensusNodeStates.INITIALIZED),
],
Expand Down
37 changes: 26 additions & 11 deletions src/commands/node/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,12 @@ export class NodeCommandTasks {
);
}

taskCheckNetworkNodePods(ctx: any, task: ListrTaskWrapper<any, any, any>, nodeAliases: NodeAliases): Listr {
taskCheckNetworkNodePods(
ctx: any,
task: ListrTaskWrapper<any, any, any>,
nodeAliases: NodeAliases,
maxAttempts = undefined,
): Listr {
if (!ctx.config) ctx.config = {};

ctx.config.podNames = {};
Expand All @@ -754,7 +759,15 @@ export class NodeCommandTasks {
subTasks.push({
title: `Check network pod: ${chalk.yellow(nodeAlias)}`,
task: async (ctx: any) => {
ctx.config.podNames[nodeAlias] = await self.checkNetworkNodePod(ctx.config.namespace, nodeAlias);
try {
ctx.config.podNames[nodeAlias] = await self.checkNetworkNodePod(
ctx.config.namespace,
nodeAlias,
maxAttempts,
);
} catch (_) {
ctx.config.skipStop = true;
}
},
});
}
Expand Down Expand Up @@ -842,10 +855,10 @@ export class NodeCommandTasks {
);
}

identifyNetworkPods() {
identifyNetworkPods(maxAttempts = undefined) {
const self = this;
return new Task('Identify network pods', (ctx: any, task: ListrTaskWrapper<any, any, any>) => {
return self.taskCheckNetworkNodePods(ctx, task, ctx.config.nodeAliases);
return self.taskCheckNetworkNodePods(ctx, task, ctx.config.nodeAliases, maxAttempts);
});
}

Expand Down Expand Up @@ -1068,13 +1081,15 @@ export class NodeCommandTasks {
stopNodes() {
return new Task('Stopping nodes', (ctx: any, task: ListrTaskWrapper<any, any, any>) => {
const subTasks = [];
for (const nodeAlias of ctx.config.nodeAliases) {
const podName = ctx.config.podNames[nodeAlias];
subTasks.push({
title: `Stop node: ${chalk.yellow(nodeAlias)}`,
task: async () =>
await this.k8.execContainer(podName, constants.ROOT_CONTAINER, 'systemctl stop network-node'),
});
if (!ctx.config.skipStop) {
for (const nodeAlias of ctx.config.nodeAliases) {
const podName = ctx.config.podNames[nodeAlias];
subTasks.push({
title: `Stop node: ${chalk.yellow(nodeAlias)}`,
task: async () =>
await this.k8.execContainer(podName, constants.ROOT_CONTAINER, 'systemctl stop network-node'),
});
}
}

// setup the sub-tasks
Expand Down
11 changes: 10 additions & 1 deletion src/core/k8.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1423,9 +1423,18 @@ export class K8 {
return body as k8s.V1Lease;
}

async readNamespacedLease(leaseName: string, namespace: string) {
async readNamespacedLease(leaseName: string, namespace: string, timesCalled = 0) {
const {response, body} = await this.coordinationApiClient.readNamespacedLease(leaseName, namespace).catch(e => e);

if (response?.statusCode === 500 && timesCalled < 4) {
// could be k8s control plane has no resources available
this.logger.debug(
`Retrying readNamespacedLease(${leaseName}, ${namespace}) in 5 seconds because of statusCode 500`,
);
await sleep(Duration.ofSeconds(5));
return await this.readNamespacedLease(leaseName, namespace, timesCalled + 1);
}

this.handleKubernetesClientError(response, body, 'Failed to read namespaced lease');

return body as k8s.V1Lease;
Expand Down

0 comments on commit 437bbc6

Please sign in to comment.