From 331de0f797ee12eab9310369836e16daf7ad1929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=91=A8=F0=9F=8F=BC=E2=80=8D=F0=9F=92=BB=20Romain=20M?= =?UTF-8?q?arcadier-Muller?= Date: Fri, 14 Aug 2020 17:13:52 +0200 Subject: [PATCH 1/2] fix(pacmak): `EMFILE` error when running `jsii-pacmak` The `findLocalBuildDirs` function did not have a protection against inspecting the same `package.json` file multiple times, and asynchronously traverses the whole dependency tree under the built package. In certain pathological cases, dependencies could be processed many times around (think about how `@aws-cdk/aws-iam` is a direct or transitive dependency of nearly every other `@aws-cdk/aws-*` module). The asynchronous nature of the process means that *many* instances of the same file could be opened at the same time, occasionally inching above the maximum file descriptor count limit, hence causing `EMFILE` ( which is the standard error code for "too many files open"). This change adds a `visitedDirectories` set to prevent re-visiting the same dependency instance multiple times (based on the absolute path of the package root). This incidentally also improves the performance of the process, since making promises incurs overhead, and not re-processing directories multiple times cut out a significant chunk of the promises made in extreme cases. Special thanks to @richardhboyd for having me look into this particular problem. --- packages/jsii-pacmak/lib/target.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/jsii-pacmak/lib/target.ts b/packages/jsii-pacmak/lib/target.ts index d04aedafcf..bf2524abb2 100644 --- a/packages/jsii-pacmak/lib/target.ts +++ b/packages/jsii-pacmak/lib/target.ts @@ -90,11 +90,19 @@ export async function findLocalBuildDirs( rootPackageDir: string, targetName: string, ) { + const visitedDirectories = new Set(); + const results = new Set(); await recurse(rootPackageDir, true); return Array.from(results); async function recurse(packageDir: string, isRoot: boolean) { + // Prevent re-processing the same `package.json` over and over again. + if (visitedDirectories.has(packageDir)) { + return; + } + visitedDirectories.add(packageDir); + const pkg = await fs.readJson(path.join(packageDir, 'package.json')); // no jsii or jsii.outdir - either a misconfigured jsii package or a non-jsii dependency. either way, we are done here. From 0adfe536d5ef2262c706c894550a969848f452da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=91=A8=F0=9F=8F=BC=E2=80=8D=F0=9F=92=BB=20Romain=20M?= =?UTF-8?q?arcadier-Muller?= Date: Tue, 18 Aug 2020 15:49:58 +0200 Subject: [PATCH 2/2] extract dependency graph traversal to separate module & unit test it --- packages/jsii-pacmak/lib/dependency-graph.ts | 101 ++++++++++++++++ packages/jsii-pacmak/lib/target.ts | 44 +++---- .../jsii-pacmak/test/dependency-graph.test.ts | 113 ++++++++++++++++++ 3 files changed, 229 insertions(+), 29 deletions(-) create mode 100644 packages/jsii-pacmak/lib/dependency-graph.ts create mode 100644 packages/jsii-pacmak/test/dependency-graph.test.ts diff --git a/packages/jsii-pacmak/lib/dependency-graph.ts b/packages/jsii-pacmak/lib/dependency-graph.ts new file mode 100644 index 0000000000..b4f80e6f18 --- /dev/null +++ b/packages/jsii-pacmak/lib/dependency-graph.ts @@ -0,0 +1,101 @@ +import * as fs from 'fs-extra'; +import { join } from 'path'; + +import * as util from './util'; + +/** + * Traverses the dependency graph and invokes the provided callback method for + * each individual dependency root directory (including the current package). + * The dependency roots are de-duplicated based on their absolute path on the + * file system. + * + * @param packageDir the current package's root directory (i.e: where the + * `package.json` file is located) + * @param callback the function to invoke with each package's informations + * @param host the dependency graph traversal host to use (this parameter + * should typically not be provided unless this module is + * being unit tested) + */ +export async function traverseDependencyGraph( + packageDir: string, + callback: Callback, + host: TraverseDependencyGraphHost = { + readJson: fs.readJson, + resolveDependencyDirectory: util.resolveDependencyDirectory, + }, +): Promise { + return real$traverseDependencyGraph(packageDir, callback, host, new Set()); +} + +/** + * A callback invoked for each node in a NPM module's dependency graph. + * + * @param packageDir the directory where the current package is located. + * @param meta the contents of the `package.json` file for this package. + * @param root whether this package is the root that was provided to the + * `traverseDependencyGraph` call. + * + * @returns `true` if this package's own dependencies should be processed, + * `false` otherwise. + */ +export type Callback = ( + packageDir: string, + meta: PackageJson, + root: boolean, +) => boolean | Promise; + +/** + * Host methods for traversing dependency graphs. + */ +export interface TraverseDependencyGraphHost { + readonly readJson: typeof fs.readJson; + readonly resolveDependencyDirectory: typeof util.resolveDependencyDirectory; +} + +/** + * Contents of the `package.json` file. + */ +export interface PackageJson { + readonly dependencies?: { readonly [name: string]: string }; + readonly peerDependencies?: { readonly [name: string]: string }; + + readonly [key: string]: unknown; +} + +async function real$traverseDependencyGraph( + packageDir: string, + callback: Callback, + host: TraverseDependencyGraphHost, + visited: Set, +): Promise { + // We're at the root if we have not visited anything yet. How convenient! + const isRoot = visited.size === 0; + if (visited.has(packageDir)) { + return void 0; + } + visited.add(packageDir); + + const meta: PackageJson = await host.readJson( + join(packageDir, 'package.json'), + ); + if (!(await callback(packageDir, meta, isRoot))) { + return void 0; + } + + const deps = new Set([ + ...Object.keys(meta.dependencies ?? {}), + ...Object.keys(meta.peerDependencies ?? {}), + ]); + return Promise.all( + Array.from(deps).map((dep) => { + const dependencyDir = host.resolveDependencyDirectory(packageDir, dep); + return real$traverseDependencyGraph( + dependencyDir, + callback, + host, + visited, + ); + }), + // The following ".then" literally just turns a `Promise` into a `Promise`. Convenient! + ).then(); +} diff --git a/packages/jsii-pacmak/lib/target.ts b/packages/jsii-pacmak/lib/target.ts index bf2524abb2..8130b91bc0 100644 --- a/packages/jsii-pacmak/lib/target.ts +++ b/packages/jsii-pacmak/lib/target.ts @@ -3,9 +3,9 @@ import * as reflect from 'jsii-reflect'; import * as spec from '@jsii/spec'; import * as path from 'path'; +import { traverseDependencyGraph } from './dependency-graph'; import { IGenerator } from './generator'; import * as logging from './logging'; -import { resolveDependencyDirectory } from './util'; import { Rosetta } from 'jsii-rosetta'; export abstract class Target { @@ -90,48 +90,34 @@ export async function findLocalBuildDirs( rootPackageDir: string, targetName: string, ) { - const visitedDirectories = new Set(); - const results = new Set(); - await recurse(rootPackageDir, true); + await traverseDependencyGraph(rootPackageDir, processPackage); return Array.from(results); - async function recurse(packageDir: string, isRoot: boolean) { - // Prevent re-processing the same `package.json` over and over again. - if (visitedDirectories.has(packageDir)) { - return; - } - visitedDirectories.add(packageDir); - - const pkg = await fs.readJson(path.join(packageDir, 'package.json')); - + async function processPackage( + packageDir: string, + pkg: any, + isRoot: boolean, + ): Promise { // no jsii or jsii.outdir - either a misconfigured jsii package or a non-jsii dependency. either way, we are done here. if (!pkg.jsii || !pkg.jsii.outdir) { - return; + return false; + } + + if (isRoot) { + // This is the root package - no need to register it's outdir + return true; } // if an output directory exists for this module, then we add it to our // list of results (unless it's the root package, which we are currently building) const outdir = path.join(packageDir, pkg.jsii.outdir, targetName); - if (results.has(outdir)) { - return; - } // Already visited, don't recurse again - - if (!isRoot && (await fs.pathExists(outdir))) { + if (await fs.pathExists(outdir)) { logging.debug(`Found ${outdir} as a local dependency output`); results.add(outdir); } - // now descend to dependencies - await Promise.all( - Object.keys(pkg.dependencies ?? {}).map((dependencyName) => { - const dependencyDir = resolveDependencyDirectory( - packageDir, - dependencyName, - ); - return recurse(dependencyDir, false); - }), - ); + return true; } } diff --git a/packages/jsii-pacmak/test/dependency-graph.test.ts b/packages/jsii-pacmak/test/dependency-graph.test.ts new file mode 100644 index 0000000000..1a18618666 --- /dev/null +++ b/packages/jsii-pacmak/test/dependency-graph.test.ts @@ -0,0 +1,113 @@ +import { tmpdir } from 'os'; +import { join } from 'path'; +import { Callback, traverseDependencyGraph } from '../lib/dependency-graph'; + +const mockHost = { + readJson: jest.fn, [string]>().mockName('fs.readJson'), + resolveDependencyDirectory: jest + .fn() + .mockName('resolveDependencyDirectory'), +}; + +afterEach((done) => { + jest.resetAllMocks(); + done(); +}); + +test('de-duplicates package root directories', async () => { + // GIVEN the following package dependency graph: + // A -> B -> C + // A -> C + const packages: Record = { + A: { + root: join(tmpdir(), 'A'), + meta: { dependencies: { B: '*' }, peerDependencies: { C: '*' } }, + }, + B: { root: join(tmpdir(), 'B'), meta: { dependencies: { C: '*' } } }, + C: { root: join(tmpdir(), 'C'), meta: {} }, + }; + + const cb: Callback = jest + .fn() + .mockName('callback') + .mockImplementation(() => true); + + mockHost.readJson.mockImplementation((file) => { + const result = Object.values(packages).find( + ({ root }) => file === join(root, 'package.json'), + )?.meta; + return result != null + ? Promise.resolve(result) + : Promise.reject(new Error(`Unexpected file access: ${file}`)); + }); + + mockHost.resolveDependencyDirectory.mockImplementation((_dir, dep) => { + const result = packages[dep]?.root; + if (result == null) { + throw new Error(`Unknown dependency: ${dep}`); + } + return result; + }); + + // WHEN + await expect( + traverseDependencyGraph(packages.A.root, cb, mockHost), + ).resolves.not.toThrow(); + + // THEN + expect(cb).toHaveBeenCalledTimes(3); + + for (const { root, meta } of Object.values(packages)) { + expect(cb).toHaveBeenCalledWith(root, meta, root === packages.A.root); + } + + expect(mockHost.readJson).toHaveBeenCalledTimes(3); + expect(mockHost.resolveDependencyDirectory).toHaveBeenCalledTimes(3); +}); + +test('stops traversing when callback returns false', async () => { + // GIVEN the following package dependency graph: + // A -> B -> C + const packages: Record = { + A: { root: join(tmpdir(), 'A'), meta: { dependencies: { B: '*' } } }, + B: { root: join(tmpdir(), 'B'), meta: { peerDependencies: { C: '*' } } }, + C: { root: join(tmpdir(), 'C'), meta: {} }, + }; + + // The callback requests aborting once it reached B + const cb: Callback = jest + .fn() + .mockName('callback') + .mockImplementation((root) => root !== packages.B.root); + + mockHost.readJson.mockImplementation((file) => { + const result = Object.values(packages).find( + ({ root }) => file === join(root, 'package.json'), + )?.meta; + return result != null + ? Promise.resolve(result) + : Promise.reject(new Error(`Unexpected file access: ${file}`)); + }); + + mockHost.resolveDependencyDirectory.mockImplementation((_dir, dep) => { + const result = packages[dep]?.root; + if (result == null) { + throw new Error(`Unknown dependency: ${dep}`); + } + return result; + }); + + // WHEN + await expect( + traverseDependencyGraph(packages.A.root, cb, mockHost), + ).resolves.not.toThrow(); + + // THEN + expect(cb).toHaveBeenCalledTimes(2); + + expect(cb).toHaveBeenCalledWith(packages.A.root, packages.A.meta, true); + expect(cb).toHaveBeenCalledWith(packages.B.root, packages.B.meta, false); + + expect(mockHost.readJson).toHaveBeenCalledTimes(2); + expect(mockHost.resolveDependencyDirectory).toHaveBeenCalledTimes(1); +});