Skip to content

Commit

Permalink
fix(pacmak): EMFILE error when running jsii-pacmak (#1891)
Browse files Browse the repository at this point in the history
The `findLocalBuildDirs` function did not have a protection against
inspecting the same `package.json` file multiple times, and
asynchronously traverses the whole dependency tree under the built
package. In certain pathological cases, dependencies could be processed
many times around (think about how `@aws-cdk/aws-iam` is a direct or
transitive dependency of nearly every other `@aws-cdk/aws-*` module).
The asynchronous nature of the process means that *many* instances of
the same file could be opened at the same time, occasionally inching
above the maximum file descriptor count limit, hence causing `EMFILE` (
which is the standard error code for "too many files open").

This change adds a `visitedDirectories` set to prevent re-visiting the
same dependency instance multiple times (based on the absolute path of
the package root). This incidentally also improves the performance of
the process, since making promises incurs overhead, and not
re-processing directories multiple times cut out a significant chunk of
the promises made in extreme cases.

Special thanks to @richardhboyd for having me look into this particular
problem.



---

By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].

[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
  • Loading branch information
RomainMuller authored Aug 18, 2020
1 parent 8f31b1a commit 7316b44
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 21 deletions.
101 changes: 101 additions & 0 deletions packages/jsii-pacmak/lib/dependency-graph.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import * as fs from 'fs-extra';
import { join } from 'path';

import * as util from './util';

/**
* Traverses the dependency graph and invokes the provided callback method for
* each individual dependency root directory (including the current package).
* The dependency roots are de-duplicated based on their absolute path on the
* file system.
*
* @param packageDir the current package's root directory (i.e: where the
* `package.json` file is located)
* @param callback the function to invoke with each package's informations
* @param host the dependency graph traversal host to use (this parameter
* should typically not be provided unless this module is
* being unit tested)
*/
export async function traverseDependencyGraph(
packageDir: string,
callback: Callback,
host: TraverseDependencyGraphHost = {
readJson: fs.readJson,
resolveDependencyDirectory: util.resolveDependencyDirectory,
},
): Promise<void> {
return real$traverseDependencyGraph(packageDir, callback, host, new Set());
}

/**
* A callback invoked for each node in a NPM module's dependency graph.
*
* @param packageDir the directory where the current package is located.
* @param meta the contents of the `package.json` file for this package.
* @param root whether this package is the root that was provided to the
* `traverseDependencyGraph` call.
*
* @returns `true` if this package's own dependencies should be processed,
* `false` otherwise.
*/
export type Callback = (
packageDir: string,
meta: PackageJson,
root: boolean,
) => boolean | Promise<boolean>;

/**
* Host methods for traversing dependency graphs.
*/
export interface TraverseDependencyGraphHost {
readonly readJson: typeof fs.readJson;
readonly resolveDependencyDirectory: typeof util.resolveDependencyDirectory;
}

/**
* Contents of the `package.json` file.
*/
export interface PackageJson {
readonly dependencies?: { readonly [name: string]: string };
readonly peerDependencies?: { readonly [name: string]: string };

readonly [key: string]: unknown;
}

async function real$traverseDependencyGraph(
packageDir: string,
callback: Callback,
host: TraverseDependencyGraphHost,
visited: Set<string>,
): Promise<void> {
// We're at the root if we have not visited anything yet. How convenient!
const isRoot = visited.size === 0;
if (visited.has(packageDir)) {
return void 0;
}
visited.add(packageDir);

const meta: PackageJson = await host.readJson(
join(packageDir, 'package.json'),
);
if (!(await callback(packageDir, meta, isRoot))) {
return void 0;
}

const deps = new Set([
...Object.keys(meta.dependencies ?? {}),
...Object.keys(meta.peerDependencies ?? {}),
]);
return Promise.all(
Array.from(deps).map((dep) => {
const dependencyDir = host.resolveDependencyDirectory(packageDir, dep);
return real$traverseDependencyGraph(
dependencyDir,
callback,
host,
visited,
);
}),
// The following ".then" literally just turns a `Promise<T>` into a `Promise<void>`. Convenient!
).then();
}
36 changes: 15 additions & 21 deletions packages/jsii-pacmak/lib/target.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import * as reflect from 'jsii-reflect';
import * as spec from '@jsii/spec';
import * as path from 'path';

import { traverseDependencyGraph } from './dependency-graph';
import { IGenerator } from './generator';
import * as logging from './logging';
import { resolveDependencyDirectory } from './util';
import { Rosetta } from 'jsii-rosetta';

export abstract class Target {
Expand Down Expand Up @@ -91,39 +91,33 @@ export async function findLocalBuildDirs(
targetName: string,
) {
const results = new Set<string>();
await recurse(rootPackageDir, true);
await traverseDependencyGraph(rootPackageDir, processPackage);
return Array.from(results);

async function recurse(packageDir: string, isRoot: boolean) {
const pkg = await fs.readJson(path.join(packageDir, 'package.json'));

async function processPackage(
packageDir: string,
pkg: any,
isRoot: boolean,
): Promise<boolean> {
// no jsii or jsii.outdir - either a misconfigured jsii package or a non-jsii dependency. either way, we are done here.
if (!pkg.jsii || !pkg.jsii.outdir) {
return;
return false;
}

if (isRoot) {
// This is the root package - no need to register it's outdir
return true;
}

// if an output directory exists for this module, then we add it to our
// list of results (unless it's the root package, which we are currently building)
const outdir = path.join(packageDir, pkg.jsii.outdir, targetName);
if (results.has(outdir)) {
return;
} // Already visited, don't recurse again

if (!isRoot && (await fs.pathExists(outdir))) {
if (await fs.pathExists(outdir)) {
logging.debug(`Found ${outdir} as a local dependency output`);
results.add(outdir);
}

// now descend to dependencies
await Promise.all(
Object.keys(pkg.dependencies ?? {}).map((dependencyName) => {
const dependencyDir = resolveDependencyDirectory(
packageDir,
dependencyName,
);
return recurse(dependencyDir, false);
}),
);
return true;
}
}

Expand Down
113 changes: 113 additions & 0 deletions packages/jsii-pacmak/test/dependency-graph.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { tmpdir } from 'os';
import { join } from 'path';
import { Callback, traverseDependencyGraph } from '../lib/dependency-graph';

const mockHost = {
readJson: jest.fn<Promise<any>, [string]>().mockName('fs.readJson'),
resolveDependencyDirectory: jest
.fn<string, [string, string]>()
.mockName('resolveDependencyDirectory'),
};

afterEach((done) => {
jest.resetAllMocks();
done();
});

test('de-duplicates package root directories', async () => {
// GIVEN the following package dependency graph:
// A -> B -> C
// A -> C
const packages: Record<string, { root: string; meta: any }> = {
A: {
root: join(tmpdir(), 'A'),
meta: { dependencies: { B: '*' }, peerDependencies: { C: '*' } },
},
B: { root: join(tmpdir(), 'B'), meta: { dependencies: { C: '*' } } },
C: { root: join(tmpdir(), 'C'), meta: {} },
};

const cb: Callback = jest
.fn()
.mockName('callback')
.mockImplementation(() => true);

mockHost.readJson.mockImplementation((file) => {
const result = Object.values(packages).find(
({ root }) => file === join(root, 'package.json'),
)?.meta;
return result != null
? Promise.resolve(result)
: Promise.reject(new Error(`Unexpected file access: ${file}`));
});

mockHost.resolveDependencyDirectory.mockImplementation((_dir, dep) => {
const result = packages[dep]?.root;
if (result == null) {
throw new Error(`Unknown dependency: ${dep}`);
}
return result;
});

// WHEN
await expect(
traverseDependencyGraph(packages.A.root, cb, mockHost),
).resolves.not.toThrow();

// THEN
expect(cb).toHaveBeenCalledTimes(3);

for (const { root, meta } of Object.values(packages)) {
expect(cb).toHaveBeenCalledWith(root, meta, root === packages.A.root);
}

expect(mockHost.readJson).toHaveBeenCalledTimes(3);
expect(mockHost.resolveDependencyDirectory).toHaveBeenCalledTimes(3);
});

test('stops traversing when callback returns false', async () => {
// GIVEN the following package dependency graph:
// A -> B -> C
const packages: Record<string, { root: string; meta: any }> = {
A: { root: join(tmpdir(), 'A'), meta: { dependencies: { B: '*' } } },
B: { root: join(tmpdir(), 'B'), meta: { peerDependencies: { C: '*' } } },
C: { root: join(tmpdir(), 'C'), meta: {} },
};

// The callback requests aborting once it reached B
const cb: Callback = jest
.fn()
.mockName('callback')
.mockImplementation((root) => root !== packages.B.root);

mockHost.readJson.mockImplementation((file) => {
const result = Object.values(packages).find(
({ root }) => file === join(root, 'package.json'),
)?.meta;
return result != null
? Promise.resolve(result)
: Promise.reject(new Error(`Unexpected file access: ${file}`));
});

mockHost.resolveDependencyDirectory.mockImplementation((_dir, dep) => {
const result = packages[dep]?.root;
if (result == null) {
throw new Error(`Unknown dependency: ${dep}`);
}
return result;
});

// WHEN
await expect(
traverseDependencyGraph(packages.A.root, cb, mockHost),
).resolves.not.toThrow();

// THEN
expect(cb).toHaveBeenCalledTimes(2);

expect(cb).toHaveBeenCalledWith(packages.A.root, packages.A.meta, true);
expect(cb).toHaveBeenCalledWith(packages.B.root, packages.B.meta, false);

expect(mockHost.readJson).toHaveBeenCalledTimes(2);
expect(mockHost.resolveDependencyDirectory).toHaveBeenCalledTimes(1);
});

0 comments on commit 7316b44

Please sign in to comment.