Skip to content

Commit

Permalink
replace globby with much faster code when gitignore enabled (#426)
Browse files Browse the repository at this point in the history
  • Loading branch information
phiresky authored and webpro committed Jan 11, 2024
1 parent 6b59ea0 commit 08381ba
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 11 deletions.
57 changes: 56 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions packages/knip/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,31 @@
],
"dependencies": {
"@ericcornelissen/bash-parser": "0.5.2",
"@nodelib/fs.walk": "2.0.0",
"@npmcli/map-workspaces": "3.0.4",
"@npmcli/package-json": "5.0.0",
"@pkgjs/parseargs": "0.11.0",
"@pnpm/logger": "5.0.0",
"@pnpm/workspace.pkgs-graph": "^2.0.12",
"@snyk/github-codeowners": "1.1.0",
"@types/picomatch": "2.3.3",
"chalk": "^5.3.0",
"easy-table": "1.2.0",
"fast-glob": "3.3.2",
"globby": "^14.0.0",
"jiti": "1.21.0",
"js-yaml": "4.1.0",
"micromatch": "4.0.5",
"minimist": "1.2.8",
"picomatch": "3.0.1",
"pretty-ms": "8.0.0",
"strip-json-comments": "5.0.1",
"summary": "2.1.0",
"zod": "3.22.4",
"zod-validation-error": "2.1.0"
},
"peerDependencies": {
"typescript": ">=5.0.4",
"@types/node": ">=18"
"@types/node": ">=18",
"typescript": ">=5.0.4"
},
"devDependencies": {
"@jest/types": "29.6.3",
Expand Down
3 changes: 1 addition & 2 deletions packages/knip/src/PrincipalFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { ProjectPrincipal } from './ProjectPrincipal.js';
import { debugLog } from './util/debug.js';
import { toAbsolute } from './util/path.js';
import type { SyncCompilers, AsyncCompilers } from './compilers/types.js';
import type { GlobbyFilterFunction } from 'globby';

type Paths = ts.CompilerOptions['paths'];

Expand All @@ -16,7 +15,7 @@ export type PrincipalOptions = {
paths: Paths;
compilers: [SyncCompilers, AsyncCompilers];
pkgName: string;
isGitIgnored: GlobbyFilterFunction;
isGitIgnored: (path: string) => boolean;
isIsolateWorkspaces: boolean;
};

Expand Down
3 changes: 1 addition & 2 deletions packages/knip/src/ProjectPrincipal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import type { SerializableExportMember } from './types/exports.js';
import type { UnresolvedImport } from './types/imports.js';
import type { BoundSourceFile, GetResolvedModule, ProgramMaybe53 } from './typescript/SourceFile.js';
import type { ReferencedDependencies } from './WorkspaceWorker.js';
import type { GlobbyFilterFunction } from 'globby';

// These compiler options override local options
const baseCompilerOptions = {
Expand Down Expand Up @@ -65,7 +64,7 @@ export class ProjectPrincipal {
// We don't want to report unused exports of config/plugin entry files
skipExportsAnalysis = new Set<string>();

isGitIgnored: GlobbyFilterFunction;
isGitIgnored: (path: string) => boolean;
cwd: string;
compilerOptions: ts.CompilerOptions;
extensions: Set<string>;
Expand Down
4 changes: 2 additions & 2 deletions packages/knip/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { isGitIgnoredSync } from 'globby';
import micromatch from 'micromatch';
import { _getDependenciesFromScripts } from './binaries/index.js';
import { getExtensions, mergeCompilers } from './compilers/index.js';
Expand All @@ -11,6 +10,7 @@ import { PrincipalFactory } from './PrincipalFactory.js';
import { ProjectPrincipal } from './ProjectPrincipal.js';
import { debugLogObject, debugLogArray, debugLog, exportLookupLog } from './util/debug.js';
import { _glob, negate } from './util/glob.js';
import { isGitIgnoredFn } from './util/globby.js';
import {
getEntryPathFromManifest,
getPackageNameFromFilePath,
Expand Down Expand Up @@ -62,7 +62,7 @@ export const main = async (unresolvedConfiguration: CommandLineOptions) => {

// Central function, to prevent `Path is not in cwd` errors from `globby`
// Provide `cwd`, otherwise defaults to `process.cwd()` w/ incompatible slashes in Windows
const isGitIgnored = gitignore ? isGitIgnoredSync({ cwd }) : () => false;
const isGitIgnored = gitignore ? await isGitIgnoredFn({ cwd }) : () => false;

streamer.cast('Reading workspace configuration(s)...');

Expand Down
2 changes: 1 addition & 1 deletion packages/knip/src/util/glob.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import fg from 'fast-glob';
import { globby } from 'globby';
import { GLOBAL_IGNORE_PATTERNS, ROOT_WORKSPACE_NAME } from '../constants.js';
import { compact } from './array.js';
import { debugLogObject } from './debug.js';
import { globby } from './globby.js';
import { join, relative } from './path.js';
import { timerify } from './Performance.js';

Expand Down
131 changes: 131 additions & 0 deletions packages/knip/src/util/globby.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import * as fs from 'fs';
import { promisify } from 'node:util';
import { walk as _walk } from '@nodelib/fs.walk';
import { type Options as FastGlobOptions } from 'fast-glob';
import fastGlob from 'fast-glob';
import picomatch from "picomatch"; // TODO: this should potentially be from "picomatch/posix" for windows compat
import { debugLogObject } from './debug.js';
import * as path from './path.js';
import { timerify } from './Performance.js';
import type { Entry } from '@nodelib/fs.walk'

const walk = promisify(_walk);
type Options = {
/** Respect ignore patterns in `.gitignore` files that apply to the globbed files. */
readonly gitignore?: boolean;

/** The current working directory in which to search. */
readonly cwd: string;
} & FastGlobOptionsWithoutCwd;

type FastGlobOptionsWithoutCwd = Pick<FastGlobOptions, 'onlyDirectories' | 'ignore' | 'absolute' | 'dot'>;

/**
* micromatch and gitignore use slightly different syntax. convert it.
*
* we can't use the `ignore` npm library because (a) it doesn't support multiple gitignore files and
* (b) we want to pass the resulting globs to fast-glob which uses micromatch internally.
*/
function convertGitignoreToMicromatch(pattern: string, base: string) {
let negated = pattern[0] === '!';
if (negated) {
pattern = pattern.slice(1);
}
const otherPatterns = [];
// gitignore matches by basename if no slash present
if (!pattern.includes('/')) pattern = '**/' + pattern;
// leading slash on git is equivalent to no leading slash in micromatch
else if (pattern.startsWith('/')) pattern = pattern.slice(1);
// micromatch does not interpret dirs as matching their children, git does
if (pattern.endsWith('/')) otherPatterns.push(pattern + '**');
else otherPatterns.push(pattern + '/**');
return { negated, patterns: [pattern, ...otherPatterns].map(pattern => path.join(base, pattern)) };
}

/** this function needs to be synchronous currently because the fs.walk library takes a synchronous callback for filtering */
function parseGitignoreFile(filePath: string, cwd: string) {
const file = fs.readFileSync(filePath, 'utf8');
const base = path.relative(cwd, path.dirname(filePath));

return file
.split(/\r?\n/)
.filter(line => line && !line.startsWith('#'))
.map(pattern => convertGitignoreToMicromatch(pattern, base));
}
/** contains parsed individual gitignore rules from potentially multiple gitignore files */
type Gitignores = { ignores: string[]; unignores: string[] };

/** walks a directory, parsing gitignores and using them directly on the way (early pruning) */
async function _parseFindGitignores(options: Options): Promise<Gitignores> {
const ignores: string[] = [];
const unignores: string[] = [];
const gitignoreFiles: string[] = [];
// whenever a new gitignore file is found, this matcher is recompiled
let matcher: picomatch.Matcher = () => true;
const entryFilter = (entry: Entry) => {
if (entry.dirent.isFile() && entry.name === '.gitignore') {
gitignoreFiles.push(entry.path);
for (const rule of parseGitignoreFile(entry.path, options.cwd))
if (rule.negated) unignores.push(...rule.patterns);
else ignores.push(...rule.patterns);
matcher = picomatch(ignores, { ignore: unignores });
return true;
}
return false;
};
const deepFilter = (entry: Entry) => !matcher(path.relative(options.cwd, entry.path))
// we don't actually care about the result of the walk since we incrementally add the results in entryFilter
await walk(options.cwd, {
// when we see a .gitignore, parse and add it
entryFilter: timerify(entryFilter),
// early pruning: don't recurse into directories that are ignored (important!)
deepFilter: timerify(deepFilter),
});
debugLogObject(options.cwd, 'parsed gitignore files', { consideredFiles: gitignoreFiles, ignores, unignores });
return { ignores, unignores };
}

const parseFindGitignores = timerify(_parseFindGitignores);
// since knip parses gitignores only a limited number of times and mostly purely for the repo root, permanent caching should be fine
const cachedIgnores = new Map<string, Gitignores>();

/** load gitignores into memory, with caching */
async function loadGitignores(options: Options): Promise<Gitignores> {
let gitignore = cachedIgnores.get(options.cwd);
if (!gitignore) {
gitignore = await parseFindGitignores(options);
cachedIgnores.set(options.cwd, gitignore);
}
return gitignore;
}
/** simpler and faster replacement for the globby npm library */
export async function globby(patterns: string | string[], options: Options): Promise<string[]> {
const ignore = options.ignore ?? [];
if (options.gitignore) {
const gitignores = await loadGitignores(options);
// add git ignores to knip explicit ignores
ignore.push(...gitignores.ignores);
// add git unignores (!foo/bar).
// I'm not sure 100% what the behaviour of fast-glob is here. Potentially this will cause it
// to have git unignores to take precedence over knip ignores.
ignore.push(...gitignores.unignores.map(e => '!' + e));
}
debugLogObject(options.cwd, `fastGlobOptions`, { patterns, ...options, ignore });

return fastGlob(patterns, {
...options,
ignore,
});
}

/** create a function that should be equivalent to `git check-ignored` */
export async function isGitIgnoredFn(options: Options): Promise<(path: string) => boolean> {
const gitignore = await loadGitignores(options);
const matcher = picomatch(gitignore.ignores, { ignore: gitignore.unignores });
const isGitIgnored = (filePath: string) => {
const ret = matcher(path.relative(options.cwd, filePath));
// debugLogObject(filePath, 'isGitIgnored', { path: path.relative(options.cwd, filePath), gitignore });
return ret;
};
return timerify(isGitIgnored);
}

1 comment on commit 08381ba

@vercel
Copy link

@vercel vercel bot commented on 08381ba Jan 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

knip-dev-v4 – ./packages/docs

knip-dev-v4.vercel.app
knip-dev-v4-webpro.vercel.app
knip-dev-v4-git-v4-webpro.vercel.app

Please sign in to comment.