Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace globby with much faster code when gitignore enabled #426

Merged
merged 6 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions packages/knip/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,31 @@
],
"dependencies": {
"@ericcornelissen/bash-parser": "0.5.2",
"@nodelib/fs.walk": "2.0.0",
"@npmcli/map-workspaces": "3.0.4",
"@npmcli/package-json": "5.0.0",
"@pkgjs/parseargs": "0.11.0",
"@pnpm/logger": "5.0.0",
"@pnpm/workspace.pkgs-graph": "^2.0.13",
"@snyk/github-codeowners": "1.1.0",
"@types/picomatch": "2.3.3",
"chalk": "^5.3.0",
"easy-table": "1.2.0",
"fast-glob": "3.3.2",
"globby": "^14.0.0",
"jiti": "1.21.0",
"js-yaml": "4.1.0",
"micromatch": "4.0.5",
"minimist": "1.2.8",
"picomatch": "3.0.1",
"pretty-ms": "8.0.0",
"strip-json-comments": "5.0.1",
"summary": "2.1.0",
"zod": "3.22.4",
"zod-validation-error": "2.1.0"
},
"peerDependencies": {
"typescript": ">=5.0.4",
"@types/node": ">=18"
"@types/node": ">=18",
"typescript": ">=5.0.4"
},
"devDependencies": {
"@jest/types": "29.6.3",
Expand Down
3 changes: 1 addition & 2 deletions packages/knip/src/PrincipalFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import ts from 'typescript';
import { ProjectPrincipal } from './ProjectPrincipal.js';
import { toAbsolute } from './util/path.js';
import type { SyncCompilers, AsyncCompilers } from './types/compilers.js';
import type { GlobbyFilterFunction } from 'globby';

type Paths = ts.CompilerOptions['paths'];

Expand All @@ -15,7 +14,7 @@ export type PrincipalOptions = {
paths: Paths;
compilers: [SyncCompilers, AsyncCompilers];
pkgName: string;
isGitIgnored: GlobbyFilterFunction;
isGitIgnored: (path: string) => boolean;
isIsolateWorkspaces: boolean;
};

Expand Down
3 changes: 1 addition & 2 deletions packages/knip/src/ProjectPrincipal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import type { SyncCompilers, AsyncCompilers } from './types/compilers.js';
import type { ExportItem, ExportItemMember } from './types/exports.js';
import type { UnresolvedImport } from './types/imports.js';
import type { BoundSourceFile, GetResolvedModule, ProgramMaybe53 } from './typescript/SourceFile.js';
import type { GlobbyFilterFunction } from 'globby';

// These compiler options override local options
const baseCompilerOptions = {
Expand Down Expand Up @@ -57,7 +56,7 @@ export class ProjectPrincipal {
// We don't want to report unused exports of config/plugin entry files
skipExportsAnalysis = new Set<string>();

isGitIgnored: GlobbyFilterFunction;
isGitIgnored: (path: string) => boolean;
cwd: string;
compilerOptions: ts.CompilerOptions;
extensions: Set<string>;
Expand Down
4 changes: 2 additions & 2 deletions packages/knip/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { isGitIgnoredSync } from 'globby';
import micromatch from 'micromatch';
import { _getDependenciesFromScripts } from './binaries/index.js';
import { ConfigurationChief } from './ConfigurationChief.js';
Expand All @@ -10,6 +9,7 @@ import { PrincipalFactory } from './PrincipalFactory.js';
import { ProjectPrincipal } from './ProjectPrincipal.js';
import { debugLogObject, debugLogArray, debugLog } from './util/debug.js';
import { _glob, negate } from './util/glob.js';
import { isGitIgnoredFn } from './util/globby.js';
import {
getEntryPathFromManifest,
getPackageNameFromFilePath,
Expand Down Expand Up @@ -59,7 +59,7 @@ export const main = async (unresolvedConfiguration: CommandLineOptions) => {

// Central function, to prevent `Path is not in cwd` errors from `globby`
// Provide `cwd`, otherwise defaults to `process.cwd()` w/ incompatible slashes in Windows
const isGitIgnored = gitignore ? isGitIgnoredSync({ cwd }) : () => false;
const isGitIgnored = gitignore ? await isGitIgnoredFn({ cwd }) : () => false;

streamer.cast('Reading workspace configuration(s)...');

Expand Down
2 changes: 1 addition & 1 deletion packages/knip/src/util/glob.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import fg from 'fast-glob';
import { globby } from 'globby';
import { GLOBAL_IGNORE_PATTERNS, ROOT_WORKSPACE_NAME } from '../constants.js';
import { compact } from './array.js';
import { debugLogObject } from './debug.js';
import { globby } from './globby.js';
import { join, relative } from './path.js';
import { timerify } from './Performance.js';

Expand Down
131 changes: 131 additions & 0 deletions packages/knip/src/util/globby.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import * as fs from 'fs';
import { promisify } from 'node:util';
import { walk as _walk } from '@nodelib/fs.walk';
import { type Options as FastGlobOptions } from 'fast-glob';
import fastGlob from 'fast-glob';
import picomatch from "picomatch"; // TODO: this should potentially be from "picomatch/posix" for windows compat
import { debugLogObject } from './debug.js';
import * as path from './path.js';
import { timerify } from './Performance.js';
import type { Entry } from '@nodelib/fs.walk'

const walk = promisify(_walk);
type Options = {
/** Respect ignore patterns in `.gitignore` files that apply to the globbed files. */
readonly gitignore?: boolean;

/** The current working directory in which to search. */
readonly cwd: string;
} & FastGlobOptionsWithoutCwd;

type FastGlobOptionsWithoutCwd = Pick<FastGlobOptions, 'onlyDirectories' | 'ignore' | 'absolute' | 'dot'>;

/**
* micromatch and gitignore use slightly different syntax. convert it.
*
* we can't use the `ignore` npm library because (a) it doesn't support multiple gitignore files and
* (b) we want to pass the resulting globs to fast-glob which uses micromatch internally.
*/
function convertGitignoreToMicromatch(pattern: string, base: string) {
let negated = pattern[0] === '!';
if (negated) {
pattern = pattern.slice(1);
}
const otherPatterns = [];
// gitignore matches by basename if no slash present
if (!pattern.includes('/')) pattern = '**/' + pattern;
// leading slash on git is equivalent to no leading slash in micromatch
else if (pattern.startsWith('/')) pattern = pattern.slice(1);
// micromatch does not interpret dirs as matching their children, git does
if (pattern.endsWith('/')) otherPatterns.push(pattern + '**');
else otherPatterns.push(pattern + '/**');
return { negated, patterns: [pattern, ...otherPatterns].map(pattern => path.join(base, pattern)) };
}

/** this function needs to be synchronous currently because the fs.walk library takes a synchronous callback for filtering */
function parseGitignoreFile(filePath: string, cwd: string) {
const file = fs.readFileSync(filePath, 'utf8');
const base = path.relative(cwd, path.dirname(filePath));

return file
.split(/\r?\n/)
.filter(line => line && !line.startsWith('#'))
.map(pattern => convertGitignoreToMicromatch(pattern, base));
}
/** contains parsed individual gitignore rules from potentially multiple gitignore files */
type Gitignores = { ignores: string[]; unignores: string[] };

/** walks a directory, parsing gitignores and using them directly on the way (early pruning) */
async function _parseFindGitignores(options: Options): Promise<Gitignores> {
const ignores: string[] = [];
const unignores: string[] = [];
const gitignoreFiles: string[] = [];
// whenever a new gitignore file is found, this matcher is recompiled
let matcher: picomatch.Matcher = () => true;
const entryFilter = (entry: Entry) => {
if (entry.dirent.isFile() && entry.name === '.gitignore') {
gitignoreFiles.push(entry.path);
for (const rule of parseGitignoreFile(entry.path, options.cwd))
if (rule.negated) unignores.push(...rule.patterns);
else ignores.push(...rule.patterns);
matcher = picomatch(ignores, { ignore: unignores });
return true;
}
return false;
};
const deepFilter = (entry: Entry) => !matcher(path.relative(options.cwd, entry.path))
// we don't actually care about the result of the walk since we incrementally add the results in entryFilter
await walk(options.cwd, {
// when we see a .gitignore, parse and add it
entryFilter: timerify(entryFilter),
// early pruning: don't recurse into directories that are ignored (important!)
deepFilter: timerify(deepFilter),
});
debugLogObject(options.cwd, 'parsed gitignore files', { consideredFiles: gitignoreFiles, ignores, unignores });
return { ignores, unignores };
}

const parseFindGitignores = timerify(_parseFindGitignores);
// since knip parses gitignores only a limited number of times and mostly purely for the repo root, permanent caching should be fine
const cachedIgnores = new Map<string, Gitignores>();

/** load gitignores into memory, with caching */
async function loadGitignores(options: Options): Promise<Gitignores> {
let gitignore = cachedIgnores.get(options.cwd);
if (!gitignore) {
gitignore = await parseFindGitignores(options);
cachedIgnores.set(options.cwd, gitignore);
}
return gitignore;
}
/** simpler and faster replacement for the globby npm library */
export async function globby(patterns: string | string[], options: Options): Promise<string[]> {
const ignore = options.ignore ?? [];
if (options.gitignore) {
const gitignores = await loadGitignores(options);
// add git ignores to knip explicit ignores
ignore.push(...gitignores.ignores);
// add git unignores (!foo/bar).
// I'm not sure 100% what the behaviour of fast-glob is here. Potentially this will cause it
// to have git unignores to take precedence over knip ignores.
ignore.push(...gitignores.unignores.map(e => '!' + e));
}
debugLogObject(options.cwd, `fastGlobOptions`, { patterns, ...options, ignore });

return fastGlob(patterns, {
...options,
ignore,
});
}

/** create a function that should be equivalent to `git check-ignored` */
export async function isGitIgnoredFn(options: Options): Promise<(path: string) => boolean> {
const gitignore = await loadGitignores(options);
const matcher = picomatch(gitignore.ignores, { ignore: gitignore.unignores });
const isGitIgnored = (filePath: string) => {
const ret = matcher(path.relative(options.cwd, filePath));
// debugLogObject(filePath, 'isGitIgnored', { path: path.relative(options.cwd, filePath), gitignore });
return ret;
};
return timerify(isGitIgnored);
}