From 7316b44690eed46ce7e5d5089af77ee1f49a6a5e Mon Sep 17 00:00:00 2001 From: Romain Marcadier Date: Tue, 18 Aug 2020 23:12:20 +0200 Subject: [PATCH] fix(pacmak): `EMFILE` error when running `jsii-pacmak` (#1891) The `findLocalBuildDirs` function did not have a protection against inspecting the same `package.json` file multiple times, and asynchronously traverses the whole dependency tree under the built package. In certain pathological cases, dependencies could be processed many times around (think about how `@aws-cdk/aws-iam` is a direct or transitive dependency of nearly every other `@aws-cdk/aws-*` module). The asynchronous nature of the process means that *many* instances of the same file could be opened at the same time, occasionally inching above the maximum file descriptor count limit, hence causing `EMFILE` ( which is the standard error code for "too many files open"). This change adds a `visitedDirectories` set to prevent re-visiting the same dependency instance multiple times (based on the absolute path of the package root). This incidentally also improves the performance of the process, since making promises incurs overhead, and not re-processing directories multiple times cut out a significant chunk of the promises made in extreme cases. Special thanks to @richardhboyd for having me look into this particular problem. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0 --- packages/jsii-pacmak/lib/dependency-graph.ts | 101 ++++++++++++++++ packages/jsii-pacmak/lib/target.ts | 36 +++--- .../jsii-pacmak/test/dependency-graph.test.ts | 113 ++++++++++++++++++ 3 files changed, 229 insertions(+), 21 deletions(-) create mode 100644 packages/jsii-pacmak/lib/dependency-graph.ts create mode 100644 packages/jsii-pacmak/test/dependency-graph.test.ts diff --git a/packages/jsii-pacmak/lib/dependency-graph.ts b/packages/jsii-pacmak/lib/dependency-graph.ts new file mode 100644 index 0000000000..b4f80e6f18 --- /dev/null +++ b/packages/jsii-pacmak/lib/dependency-graph.ts @@ -0,0 +1,101 @@ +import * as fs from 'fs-extra'; +import { join } from 'path'; + +import * as util from './util'; + +/** + * Traverses the dependency graph and invokes the provided callback method for + * each individual dependency root directory (including the current package). + * The dependency roots are de-duplicated based on their absolute path on the + * file system. + * + * @param packageDir the current package's root directory (i.e: where the + * `package.json` file is located) + * @param callback the function to invoke with each package's informations + * @param host the dependency graph traversal host to use (this parameter + * should typically not be provided unless this module is + * being unit tested) + */ +export async function traverseDependencyGraph( + packageDir: string, + callback: Callback, + host: TraverseDependencyGraphHost = { + readJson: fs.readJson, + resolveDependencyDirectory: util.resolveDependencyDirectory, + }, +): Promise { + return real$traverseDependencyGraph(packageDir, callback, host, new Set()); +} + +/** + * A callback invoked for each node in a NPM module's dependency graph. + * + * @param packageDir the directory where the current package is located. + * @param meta the contents of the `package.json` file for this package. + * @param root whether this package is the root that was provided to the + * `traverseDependencyGraph` call. + * + * @returns `true` if this package's own dependencies should be processed, + * `false` otherwise. + */ +export type Callback = ( + packageDir: string, + meta: PackageJson, + root: boolean, +) => boolean | Promise; + +/** + * Host methods for traversing dependency graphs. + */ +export interface TraverseDependencyGraphHost { + readonly readJson: typeof fs.readJson; + readonly resolveDependencyDirectory: typeof util.resolveDependencyDirectory; +} + +/** + * Contents of the `package.json` file. + */ +export interface PackageJson { + readonly dependencies?: { readonly [name: string]: string }; + readonly peerDependencies?: { readonly [name: string]: string }; + + readonly [key: string]: unknown; +} + +async function real$traverseDependencyGraph( + packageDir: string, + callback: Callback, + host: TraverseDependencyGraphHost, + visited: Set, +): Promise { + // We're at the root if we have not visited anything yet. How convenient! + const isRoot = visited.size === 0; + if (visited.has(packageDir)) { + return void 0; + } + visited.add(packageDir); + + const meta: PackageJson = await host.readJson( + join(packageDir, 'package.json'), + ); + if (!(await callback(packageDir, meta, isRoot))) { + return void 0; + } + + const deps = new Set([ + ...Object.keys(meta.dependencies ?? {}), + ...Object.keys(meta.peerDependencies ?? {}), + ]); + return Promise.all( + Array.from(deps).map((dep) => { + const dependencyDir = host.resolveDependencyDirectory(packageDir, dep); + return real$traverseDependencyGraph( + dependencyDir, + callback, + host, + visited, + ); + }), + // The following ".then" literally just turns a `Promise` into a `Promise`. Convenient! + ).then(); +} diff --git a/packages/jsii-pacmak/lib/target.ts b/packages/jsii-pacmak/lib/target.ts index d04aedafcf..8130b91bc0 100644 --- a/packages/jsii-pacmak/lib/target.ts +++ b/packages/jsii-pacmak/lib/target.ts @@ -3,9 +3,9 @@ import * as reflect from 'jsii-reflect'; import * as spec from '@jsii/spec'; import * as path from 'path'; +import { traverseDependencyGraph } from './dependency-graph'; import { IGenerator } from './generator'; import * as logging from './logging'; -import { resolveDependencyDirectory } from './util'; import { Rosetta } from 'jsii-rosetta'; export abstract class Target { @@ -91,39 +91,33 @@ export async function findLocalBuildDirs( targetName: string, ) { const results = new Set(); - await recurse(rootPackageDir, true); + await traverseDependencyGraph(rootPackageDir, processPackage); return Array.from(results); - async function recurse(packageDir: string, isRoot: boolean) { - const pkg = await fs.readJson(path.join(packageDir, 'package.json')); - + async function processPackage( + packageDir: string, + pkg: any, + isRoot: boolean, + ): Promise { // no jsii or jsii.outdir - either a misconfigured jsii package or a non-jsii dependency. either way, we are done here. if (!pkg.jsii || !pkg.jsii.outdir) { - return; + return false; + } + + if (isRoot) { + // This is the root package - no need to register it's outdir + return true; } // if an output directory exists for this module, then we add it to our // list of results (unless it's the root package, which we are currently building) const outdir = path.join(packageDir, pkg.jsii.outdir, targetName); - if (results.has(outdir)) { - return; - } // Already visited, don't recurse again - - if (!isRoot && (await fs.pathExists(outdir))) { + if (await fs.pathExists(outdir)) { logging.debug(`Found ${outdir} as a local dependency output`); results.add(outdir); } - // now descend to dependencies - await Promise.all( - Object.keys(pkg.dependencies ?? {}).map((dependencyName) => { - const dependencyDir = resolveDependencyDirectory( - packageDir, - dependencyName, - ); - return recurse(dependencyDir, false); - }), - ); + return true; } } diff --git a/packages/jsii-pacmak/test/dependency-graph.test.ts b/packages/jsii-pacmak/test/dependency-graph.test.ts new file mode 100644 index 0000000000..1a18618666 --- /dev/null +++ b/packages/jsii-pacmak/test/dependency-graph.test.ts @@ -0,0 +1,113 @@ +import { tmpdir } from 'os'; +import { join } from 'path'; +import { Callback, traverseDependencyGraph } from '../lib/dependency-graph'; + +const mockHost = { + readJson: jest.fn, [string]>().mockName('fs.readJson'), + resolveDependencyDirectory: jest + .fn() + .mockName('resolveDependencyDirectory'), +}; + +afterEach((done) => { + jest.resetAllMocks(); + done(); +}); + +test('de-duplicates package root directories', async () => { + // GIVEN the following package dependency graph: + // A -> B -> C + // A -> C + const packages: Record = { + A: { + root: join(tmpdir(), 'A'), + meta: { dependencies: { B: '*' }, peerDependencies: { C: '*' } }, + }, + B: { root: join(tmpdir(), 'B'), meta: { dependencies: { C: '*' } } }, + C: { root: join(tmpdir(), 'C'), meta: {} }, + }; + + const cb: Callback = jest + .fn() + .mockName('callback') + .mockImplementation(() => true); + + mockHost.readJson.mockImplementation((file) => { + const result = Object.values(packages).find( + ({ root }) => file === join(root, 'package.json'), + )?.meta; + return result != null + ? Promise.resolve(result) + : Promise.reject(new Error(`Unexpected file access: ${file}`)); + }); + + mockHost.resolveDependencyDirectory.mockImplementation((_dir, dep) => { + const result = packages[dep]?.root; + if (result == null) { + throw new Error(`Unknown dependency: ${dep}`); + } + return result; + }); + + // WHEN + await expect( + traverseDependencyGraph(packages.A.root, cb, mockHost), + ).resolves.not.toThrow(); + + // THEN + expect(cb).toHaveBeenCalledTimes(3); + + for (const { root, meta } of Object.values(packages)) { + expect(cb).toHaveBeenCalledWith(root, meta, root === packages.A.root); + } + + expect(mockHost.readJson).toHaveBeenCalledTimes(3); + expect(mockHost.resolveDependencyDirectory).toHaveBeenCalledTimes(3); +}); + +test('stops traversing when callback returns false', async () => { + // GIVEN the following package dependency graph: + // A -> B -> C + const packages: Record = { + A: { root: join(tmpdir(), 'A'), meta: { dependencies: { B: '*' } } }, + B: { root: join(tmpdir(), 'B'), meta: { peerDependencies: { C: '*' } } }, + C: { root: join(tmpdir(), 'C'), meta: {} }, + }; + + // The callback requests aborting once it reached B + const cb: Callback = jest + .fn() + .mockName('callback') + .mockImplementation((root) => root !== packages.B.root); + + mockHost.readJson.mockImplementation((file) => { + const result = Object.values(packages).find( + ({ root }) => file === join(root, 'package.json'), + )?.meta; + return result != null + ? Promise.resolve(result) + : Promise.reject(new Error(`Unexpected file access: ${file}`)); + }); + + mockHost.resolveDependencyDirectory.mockImplementation((_dir, dep) => { + const result = packages[dep]?.root; + if (result == null) { + throw new Error(`Unknown dependency: ${dep}`); + } + return result; + }); + + // WHEN + await expect( + traverseDependencyGraph(packages.A.root, cb, mockHost), + ).resolves.not.toThrow(); + + // THEN + expect(cb).toHaveBeenCalledTimes(2); + + expect(cb).toHaveBeenCalledWith(packages.A.root, packages.A.meta, true); + expect(cb).toHaveBeenCalledWith(packages.B.root, packages.B.meta, false); + + expect(mockHost.readJson).toHaveBeenCalledTimes(2); + expect(mockHost.resolveDependencyDirectory).toHaveBeenCalledTimes(1); +});