Skip to content

Commit

Permalink
fix(cli): deployment gets stuck deploying stacks with shared assets (#…
Browse files Browse the repository at this point in the history
…25846)

In #25536 we introduced the new work graph orchestration for the CLI which had a bug when the same asset was shared by multiple stacks. #25719 was an attempt at fixing this bug, and while it fixed it for some cases it didn't fix all of them.

The issue is that for cosmetic reasons, asset publishing steps inherit the dependencies of the stacks they are publishing for  (so that the printed output of asset publishing doesn't interfere with the in-place updated progress bar of stack deployment and make it lose track of the terminal state).

There were two bugs:

* These extra dependencies could cause cycles (#25719 addressed direct cycles, where `Publish <--> Stack`, but not indirect cycles, where `Publish -> StackA -> StackB -> Publish`).
* Publishing nodes would overwrite other nodes with the same ID, and the dependency set they would end up with would be somewhat nondeterministic, making this problem hard to isolate.

Fixes #25806.

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
rix0rrr authored Jun 5, 2023
1 parent 164a6bc commit 8b97bdf
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 90 deletions.
2 changes: 1 addition & 1 deletion packages/aws-cdk/lib/cdk-toolkit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ export class CdkToolkit {
const graphConcurrency: Concurrency = {
'stack': concurrency,
'asset-build': 1, // This will be CPU-bound/memory bound, mostly matters for Docker builds
'asset-publish': options.assetParallelism ? 8 : 1, // This will be I/O-bound, 8 in parallel seems reasonable
'asset-publish': (options.assetParallelism ?? true) ? 8 : 1, // This will be I/O-bound, 8 in parallel seems reasonable
};

await workGraph.doParallel(graphConcurrency, {
Expand Down
79 changes: 44 additions & 35 deletions packages/aws-cdk/lib/util/work-graph-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ export class WorkGraphBuilder {
id: buildId,
dependencies: new Set([
...this.getDepIds(assetArtifact.dependencies),
// If we disable prebuild, then assets inherit dependencies from their parent stack
...!this.prebuildAssets ? this.getDepIds(parentStack.dependencies) : [],
// If we disable prebuild, then assets inherit (stack) dependencies from their parent stack
...!this.prebuildAssets ? this.getDepIds(onlyStacks(parentStack.dependencies)) : [],
]),
parentStack,
assetManifestArtifact: assetArtifact,
Expand All @@ -66,27 +66,35 @@ export class WorkGraphBuilder {

// Always add the publish
const publishNodeId = `${this.idPrefix}${asset.id}-publish`;
this.graph.addNodes({
type: 'asset-publish',
id: publishNodeId,
dependencies: new Set([
buildId,
// The asset publish step also depends on the stacks that the parent depends on.
// This is purely cosmetic: if we don't do this, the progress printing of asset publishing
// is going to interfere with the progress bar of the stack deployment. We could remove this
// for overall faster deployments if we ever have a better method of progress displaying.
// Note: this may introduce a cycle if one of the parent's dependencies is another stack that
// depends on this asset. To workaround this we remove these cycles once all nodes have
// been added to the graph.
...this.getDepIds(parentStack.dependencies.filter(cxapi.CloudFormationStackArtifact.isCloudFormationStackArtifact)),
]),
parentStack,
assetManifestArtifact: assetArtifact,
assetManifest,
asset,
deploymentState: DeploymentState.PENDING,
priority: WorkGraphBuilder.PRIORITIES['asset-publish'],
});

const publishNode = this.graph.tryGetNode(publishNodeId);
if (!publishNode) {
this.graph.addNodes({
type: 'asset-publish',
id: publishNodeId,
dependencies: new Set([
buildId,
]),
parentStack,
assetManifestArtifact: assetArtifact,
assetManifest,
asset,
deploymentState: DeploymentState.PENDING,
priority: WorkGraphBuilder.PRIORITIES['asset-publish'],
});
}

for (const inheritedDep of this.getDepIds(onlyStacks(parentStack.dependencies))) {
// The asset publish step also depends on the stacks that the parent depends on.
// This is purely cosmetic: if we don't do this, the progress printing of asset publishing
// is going to interfere with the progress bar of the stack deployment. We could remove this
// for overall faster deployments if we ever have a better method of progress displaying.
// Note: this may introduce a cycle if one of the parent's dependencies is another stack that
// depends on this asset. To workaround this we remove these cycles once all nodes have
// been added to the graph.
this.graph.addDependency(publishNodeId, inheritedDep);
}

// This will work whether the stack node has been added yet or not
this.graph.addDependency(`${this.idPrefix}${parentStack.id}`, publishNodeId);
}
Expand Down Expand Up @@ -137,20 +145,17 @@ export class WorkGraphBuilder {
return ids;
}

/**
* We may have accidentally introduced cycles in an attempt to make the messages printed to the
* console not interfere with each other too much. Remove them again.
*/
private removeStackPublishCycles() {
const stacks = this.graph.nodesOfType('stack');
for (const stack of stacks) {
for (const dep of stack.dependencies) {
const node = this.graph.nodes[dep];

if (!node || node.type !== 'asset-publish' || !node.dependencies.has(stack.id)) {
continue;
const publishSteps = this.graph.nodesOfType('asset-publish');
for (const publishStep of publishSteps) {
for (const dep of publishStep.dependencies) {
if (this.graph.reachable(dep, publishStep.id)) {
publishStep.dependencies.delete(dep);
}

// Delete the dependency from the asset-publish onto the stack.
// The publish -> stack dependencies are purely cosmetic to prevent publish output
// from interfering with the progress bar of the stack deployment.
node.dependencies.delete(stack.id);
}
}
}
Expand All @@ -166,4 +171,8 @@ function stacksFromAssets(artifacts: cxapi.CloudArtifact[]) {
}

return ret;
}

function onlyStacks(artifacts: cxapi.CloudArtifact[]) {
return artifacts.filter(cxapi.CloudFormationStackArtifact.isCloudFormationStackArtifact);
}
100 changes: 73 additions & 27 deletions packages/aws-cdk/lib/util/work-graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ export class WorkGraph {

public addNodes(...nodes: WorkNode[]) {
for (const node of nodes) {
if (this.nodes[node.id]) {
throw new Error(`Duplicate use of node id: ${node.id}`);
}

const ld = this.lazyDependencies.get(node.id);
if (ld) {
for (const x of ld) {
Expand Down Expand Up @@ -72,6 +76,10 @@ export class WorkGraph {
lazyDeps.push(toId);
}

public tryGetNode(id: string): WorkNode | undefined {
return this.nodes[id];
}

public node(id: string) {
const ret = this.nodes[id];
if (!ret) {
Expand Down Expand Up @@ -198,9 +206,24 @@ export class WorkGraph {
}

public toString() {
return Object.entries(this.nodes).map(([id, node]) =>
`${id} := ${node.deploymentState} ${node.type} ${node.dependencies.size > 0 ? `(${Array.from(node.dependencies)})` : ''}`.trim(),
).join(', ');
return [
'digraph D {',
...Object.entries(this.nodes).flatMap(([id, node]) => renderNode(id, node)),
'}',
].join('\n');

function renderNode(id: string, node: WorkNode): string[] {
const ret = [];
if (node.deploymentState === DeploymentState.COMPLETED) {
ret.push(` "${id}" [style=filled,fillcolor=yellow];`);
} else {
ret.push(` "${id}";`);
}
for (const dep of node.dependencies) {
ret.push(` "${id}" -> "${dep}";`);
}
return ret;
}
}

/**
Expand Down Expand Up @@ -244,35 +267,28 @@ export class WorkGraph {
}

private updateReadyPool() {
let activeCount = 0;
let pendingCount = 0;
for (const node of Object.values(this.nodes)) {
switch (node.deploymentState) {
case DeploymentState.DEPLOYING:
activeCount += 1;
break;
case DeploymentState.PENDING:
pendingCount += 1;
if (Array.from(node.dependencies).every((id) => this.node(id).deploymentState === DeploymentState.COMPLETED)) {
node.deploymentState = DeploymentState.QUEUED;
this.readyPool.push(node);
}
break;
}
}
const activeCount = Object.values(this.nodes).filter((x) => x.deploymentState === DeploymentState.DEPLOYING).length;
const pendingCount = Object.values(this.nodes).filter((x) => x.deploymentState === DeploymentState.PENDING).length;

for (let i = 0; i < this.readyPool.length; i++) {
const node = this.readyPool[i];
if (node.deploymentState !== DeploymentState.QUEUED) {
this.readyPool.splice(i, 1);
}
const newlyReady = Object.values(this.nodes).filter((x) =>
x.deploymentState === DeploymentState.PENDING &&
Array.from(x.dependencies).every((id) => this.node(id).deploymentState === DeploymentState.COMPLETED));

// Add newly available nodes to the ready pool
for (const node of newlyReady) {
node.deploymentState = DeploymentState.QUEUED;
this.readyPool.push(node);
}

// Remove nodes from the ready pool that have already started deploying
retainOnly(this.readyPool, (node) => node.deploymentState === DeploymentState.QUEUED);

// Sort by reverse priority
this.readyPool.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));

if (this.readyPool.length === 0 && activeCount === 0 && pendingCount > 0) {
throw new Error(`Unable to make progress anymore, dependency cycle between remaining artifacts: ${this.findCycle().join(' -> ')}`);
const cycle = this.findCycle() ?? ['No cycle found!'];
throw new Error(`Unable to make progress anymore, dependency cycle between remaining artifacts: ${cycle.join(' -> ')}`);
}
}

Expand All @@ -289,14 +305,14 @@ export class WorkGraph {
*
* Not the fastest, but effective and should be rare
*/
private findCycle(): string[] {
public findCycle(): string[] | undefined {
const seen = new Set<string>();
const self = this;
for (const nodeId of Object.keys(this.nodes)) {
const cycle = recurse(nodeId, [nodeId]);
if (cycle) { return cycle; }
}
return ['No cycle found!'];
return undefined;

function recurse(nodeId: string, path: string[]): string[] | undefined {
if (seen.has(nodeId)) {
Expand All @@ -319,6 +335,32 @@ export class WorkGraph {
}
}
}

/**
* Whether the `end` node is reachable from the `start` node, following the dependency arrows
*/
public reachable(start: string, end: string): boolean {
const seen = new Set<string>();
const self = this;
return recurse(start);

function recurse(current: string) {
if (seen.has(current)) {
return false;
}
seen.add(current);

if (current === end) {
return true;
}
for (const dep of self.nodes[current].dependencies) {
if (recurse(dep)) {
return true;
}
}
return false;
}
}
}

export interface WorkGraphActions {
Expand All @@ -334,3 +376,7 @@ function sum(xs: number[]) {
}
return ret;
}

function retainOnly<A>(xs: A[], pred: (x: A) => boolean) {
xs.splice(0, xs.length, ...xs.filter(pred));
}
74 changes: 47 additions & 27 deletions packages/aws-cdk/test/work-graph-builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ test('dependencies on unselected artifacts are silently ignored', async () => {
}));
});

test('assets with shared contents between dependant stacks', async () => {
describe('tests that use assets', () => {
const files = {
// Referencing an existing file on disk is important here.
// It means these two assets will have the same AssetManifest
Expand All @@ -121,36 +121,56 @@ test('assets with shared contents between dependant stacks', async () => {
},
},
};
const environment = 'aws://11111/us-east-1';

addStack(rootBuilder, 'StackA', {
environment: 'aws://11111/us-east-1',
dependencies: ['StackA.assets'],
});
addAssets(rootBuilder, 'StackA.assets', { files });
test('assets with shared contents between dependant stacks', async () => {
addStack(rootBuilder, 'StackA', {
environment: 'aws://11111/us-east-1',
dependencies: ['StackA.assets'],
});
addAssets(rootBuilder, 'StackA.assets', { files });

addStack(rootBuilder, 'StackB', {
environment: 'aws://11111/us-east-1',
dependencies: ['StackB.assets', 'StackA'],
addStack(rootBuilder, 'StackB', {
environment: 'aws://11111/us-east-1',
dependencies: ['StackB.assets', 'StackA'],
});
addAssets(rootBuilder, 'StackB.assets', { files });

const assembly = rootBuilder.buildAssembly();

const traversal: string[] = [];
const graph = new WorkGraphBuilder(true).build(assembly.artifacts);
await graph.doParallel(1, {
deployStack: async (node) => { traversal.push(node.id); },
buildAsset: async (node) => { traversal.push(node.id); },
publishAsset: async (node) => { traversal.push(node.id); },
});

expect(traversal).toHaveLength(4); // 1 asset build, 1 asset publish, 2 stacks
expect(traversal).toEqual([
'work-graph-builder.test.js:D1-build',
'work-graph-builder.test.js:D1-publish',
'StackA',
'StackB',
]);
});
addAssets(rootBuilder, 'StackB.assets', { files });

const assembly = rootBuilder.buildAssembly();
test('a more complex way to make a cycle', async () => {
// A -> B -> C | A and C share an asset. The asset will have a dependency on B, that is not a *direct* reverse dependency, and will cause a cycle.
addStack(rootBuilder, 'StackA', { environment, dependencies: ['StackA.assets', 'StackB'] });
addAssets(rootBuilder, 'StackA.assets', { files });

const traversal: string[] = [];
const graph = new WorkGraphBuilder(true).build(assembly.artifacts);
await graph.doParallel(1, {
deployStack: async (node) => { traversal.push(node.id); },
buildAsset: async (node) => { traversal.push(node.id); },
publishAsset: async (node) => { traversal.push(node.id); },
});

expect(traversal).toHaveLength(4); // 1 asset build, 1 asset publish, 2 stacks
expect(traversal).toEqual([
'work-graph-builder.test.js:D1-build',
'work-graph-builder.test.js:D1-publish',
'StackA',
'StackB',
]);
addStack(rootBuilder, 'StackB', { environment, dependencies: ['StackC'] });

addStack(rootBuilder, 'StackC', { environment, dependencies: ['StackC.assets'] });
addAssets(rootBuilder, 'StackC.assets', { files });

const assembly = rootBuilder.buildAssembly();
const graph = new WorkGraphBuilder(true).build(assembly.artifacts);

// THEN
expect(graph.findCycle()).toBeUndefined();
});
});

/**
Expand Down Expand Up @@ -230,4 +250,4 @@ function assertableNode<A extends WorkNode>(x: A) {
...x,
dependencies: Array.from(x.dependencies),
};
}
}

0 comments on commit 8b97bdf

Please sign in to comment.