Skip to content

Commit

Permalink
optimize common patterns, correct matching of .*
Browse files Browse the repository at this point in the history
Previously, `.*` would match `.` and `..` in some cases where it
shouldn't, unlike the Bash 5 behavior targeted.

Also, this adds fast-path optimizations for:
- `*`
- `*.<some extension>`
- `.*`

which are by far the most common patterns in use, and ironically also
some of the least-performant regular expressions generated by this
library.

While the regular expression generated by makeRE will always have to
take the slower approach (lots of lookahead/lookbehind tests,
conditional anchors, etc.), in the case of `minimatch()` or
`Minimatch.test()`, or manually walking the MMRegExp objects in the set,
it will be *much* faster.
  • Loading branch information
isaacs committed Feb 11, 2023
1 parent 4890d45 commit 763a256
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 21 deletions.
108 changes: 94 additions & 14 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,29 @@ export const minimatch = (

export default minimatch

// Optimized checking for the most common glob patterns.
const starDotExtRE = /^\*+(\.[^!?\*\[\(]*)$/
const starDotExtTest = (ext: string) => (f: string) =>
!f.startsWith('.') && f.endsWith(ext)
const starDotExtTestDot = (ext: string) => (f: string) => f.endsWith(ext)
const starDotExtTestNocase = (ext: string) => {
ext = ext.toLowerCase()
return (f: string) => !f.startsWith('.') && f.toLowerCase().endsWith(ext)
}
const starDotExtTestNocaseDot = (ext: string) => {
ext = ext.toLowerCase()
return (f: string) => f.toLowerCase().endsWith(ext)
}
const starDotStarRE = /^\*+\.\*+$/
const starDotStarTest = (f: string) => !f.startsWith('.') && f.includes('.')
const starDotStarTestDot = (f: string) =>
f !== '.' && f !== '..' && f.includes('.')
const dotStarRE = /^\.\*+$/
const dotStarTest = (f: string) => f !== '.' && f !== '..' && f.startsWith('.')
const starRE = /^\*+$/
const starTest = (f: string) => f.length !== 0 && !f.startsWith('.')
const starTestDot = (f: string) => f.length !== 0 && f !== '.' && f !== '..'

/* c8 ignore start */
const platform =
typeof process === 'object' && process
Expand Down Expand Up @@ -228,10 +251,11 @@ interface NegativePatternListEntry extends PatternListEntry {
reEnd: number
}

type MMRegExp = RegExp & {
export type MMRegExp = RegExp & {
_src?: string
_glob?: string
}

type SubparseReturn = [string, boolean]
type ParseReturnFiltered = string | MMRegExp | typeof GLOBSTAR
type ParseReturn = ParseReturnFiltered | false
Expand Down Expand Up @@ -316,16 +340,41 @@ export class Minimatch {
const rawGlobParts = this.globSet.map(s => this.slashSplit(s))

// consecutive globstars are an unncessary perf killer
this.globParts = this.options.noglobstar
? rawGlobParts
: rawGlobParts.map(parts =>
parts.reduce((set: string[], part) => {
if (part !== '**' || set[set.length - 1] !== '**') {
set.push(part)
// also, **/*/... is equivalent to */**/..., so swap all of those
// this turns a pattern like **/*/**/*/x into */*/**/x
// and a pattern like **/x/**/*/y becomes **/x/*/**/y
// the *later* we can push the **, the more efficient it is,
// because we can avoid having to do a recursive walk until
// the walked tree is as shallow as possible.
// Note that this is only true up to the last pattern, though, because
// a/*/** will only match a/b if b is a dir, but a/**/* will match a/b
// regardless, since it's "0 or more path segments" if it's not final.
if (this.options.noglobstar) {
// ** is * anyway
this.globParts = rawGlobParts
} else {
for (const parts of rawGlobParts) {
let swapped: boolean
do {
swapped = false
for (let i = 0; i < parts.length - 1; i++) {
if (parts[i] === '*' && parts[i - 1] === '**') {
parts[i] = '**'
parts[i - 1] = '*'
swapped = true
}
return set
}, [])
)
}
} while (swapped)
}
this.globParts = rawGlobParts.map(parts =>
parts.reduce((set: string[], part) => {
if (part !== '**' || set[set.length - 1] !== '**') {
set.push(part)
}
return set
}, [])
)
}

this.debug(this.pattern, this.globParts)

Expand Down Expand Up @@ -601,6 +650,30 @@ export class Minimatch {
}
if (pattern === '') return ''

// far and away, the most common glob pattern parts are
// *, *.*, and *.<ext> Add a fast check method for those.
let m: RegExpMatchArray | null
let fastTest: null | ((f: string) => boolean) = null
if (isSub !== SUBPARSE) {
if ((m = pattern.match(starRE))) {
fastTest = options.dot ? starTestDot : starTest
} else if ((m = pattern.match(starDotExtRE))) {
fastTest = (
options.nocase
? options.dot
? starDotExtTestNocaseDot
: starDotExtTestNocase
: options.dot
? starDotExtTestDot
: starDotExtTest
)(m[1])
} else if ((m = pattern.match(starDotStarRE))) {
fastTest = options.dot ? starDotStarTestDot : starDotStarTest
} else if ((m = pattern.match(dotStarRE))) {
fastTest = dotStarTest
}
}

let re = ''
let hasMagic = false
let escaping = false
Expand Down Expand Up @@ -977,10 +1050,17 @@ export class Minimatch {

const flags = options.nocase ? 'i' : ''
try {
return Object.assign(new RegExp('^' + re + '$', flags), {
_glob: pattern,
_src: re,
})
const ext = fastTest
? {
_glob: pattern,
_src: re,
test: fastTest,
}
: {
_glob: pattern,
_src: re,
}
return Object.assign(new RegExp('^' + re + '$', flags), ext)
/* c8 ignore start */
} catch (er) {
// should be impossible
Expand Down
76 changes: 76 additions & 0 deletions tap-snapshots/test/basic.js.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ exports[`test/basic.js TAP basic tests > makeRe * 1`] = `
/^(?:(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe * 2`] = `
/^(?:(?!\\.)(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe * 3`] = `
/^(?:(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe *(a/b) 1`] = `
/^(?:(?=.)[^/]*?\\((?!\\.)a\\/b\\))$/
`
Expand Down Expand Up @@ -97,10 +105,34 @@ exports[`test/basic.js TAP basic tests > makeRe *.!(js) 1`] = `
/^(?:(?!\\.)(?=.)[^/]*?\\.(?:(?!(?:js)(?:$|\\/))[^/]*?))$/
`

exports[`test/basic.js TAP basic tests > makeRe *.* 1`] = `
/^(?:(?!\\.)(?=.)[^/]*?\\.[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe *.* 2`] = `
/^(?:(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?\\.[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe *.\\* 1`] = `
/^(?:(?!\\.)(?=.)[^/]*?\\.\\*)$/
`

exports[`test/basic.js TAP basic tests > makeRe *.js 1`] = `
/^(?:(?!\\.)(?=.)[^/]*?\\.js)$/
`

exports[`test/basic.js TAP basic tests > makeRe *.js 2`] = `
/^(?:(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?\\.js)$/
`

exports[`test/basic.js TAP basic tests > makeRe *.js 3`] = `
/^(?:(?!\\.)(?=.)[^/]*?\\.js)$/i
`

exports[`test/basic.js TAP basic tests > makeRe *.js 4`] = `
/^(?:(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?\\.js)$/i
`

exports[`test/basic.js TAP basic tests > makeRe */man*/bash.* 1`] = `
/^(?:(?!\\.)(?=.)[^/]*?\\/(?=.)man[^/]*?\\/(?=.)bash\\.[^/]*?)$/
`
Expand Down Expand Up @@ -129,6 +161,50 @@ exports[`test/basic.js TAP basic tests > makeRe .* 1`] = `
/^(?:(?=.)\\.[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .* 2`] = `
/^(?:(?=.)\\.[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/**/* 1`] = `
/^(?:\\.x(?:\\/|\\/(?:(?!(?:\\/|^)\\.).)*?\\/)(?!\\.)(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/**/* 2`] = `
/^(?:\\.x(?:\\/|\\/(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?\\/)(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/**/**/* 1`] = `
/^(?:\\.x(?:\\/|\\/(?:(?!(?:\\/|^)\\.).)*?\\/)(?!\\.)(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/**/**/* 2`] = `
/^(?:\\.x(?:\\/|\\/(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?\\/)(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/**/*/** 1`] = `
/^(?:\\.x\\/(?!\\.)(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)\\.).)*?)?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/**/*/** 2`] = `
/^(?:\\.x\\/(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?)?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/*/** 1`] = `
/^(?:\\.x\\/(?!\\.)(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)\\.).)*?)?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/*/** 2`] = `
/^(?:\\.x\\/(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?)?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/*/**/** 1`] = `
/^(?:\\.x\\/(?!\\.)(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)\\.).)*?)?)$/
`

exports[`test/basic.js TAP basic tests > makeRe .x/*/**/** 2`] = `
/^(?:\\.x\\/(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?)?)$/
`

exports[`test/basic.js TAP basic tests > makeRe /^root:/{s/^[^:]*:[^:]*:([^:]*).*$// 1`] = `
/^(?:\\/\\^root:\\/\\{s\\/(?=.)\\^[^:][^/]*?:[^:][^/]*?:\\([^:]\\)[^/]*?\\.[^/]*?\\$\\/\\/)$/
`
Expand Down
9 changes: 8 additions & 1 deletion test/basic.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@ t.test('basic tests', function (t) {
t.equal(String(r), String(r2), 'same results from both makeRe fns')
tapOpts.re = r
tapOpts.files = JSON.stringify(f)
tapOpts.pattern = pattern
tapOpts.glob = pattern
tapOpts.set = m.set
tapOpts.globSet = m.globSet
tapOpts.negated = m.negate
const o = Object.entries(options)
.filter(([_, v]) => v)
.map(([k]) => k)
if (o.length) {
tapOpts.flags = o
}

var actual = mm.match(f, pattern, options)
actual.sort(alpha)
Expand Down
61 changes: 55 additions & 6 deletions test/patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ module.exports = [
// even when options.dot is set.
() => (files = ['a/./b', 'a/../b', 'a/c/b', 'a/.d/b']),
['a/*/b', ['a/c/b', 'a/.d/b'], { dot: true }],
['a/.*/b', ['a/./b', 'a/../b', 'a/.d/b'], { dot: true }],
['a/.*/b', ['a/.d/b'], { dot: true }],
['a/*/b', ['a/c/b'], { dot: false }],
['a/.*/b', ['a/./b', 'a/../b', 'a/.d/b'], { dot: false }],
['a/.*/b', ['a/.d/b'], { dot: false }],

// this also tests that changing the options needs
// to change the cache key, even if the pattern is
Expand Down Expand Up @@ -266,16 +266,17 @@ module.exports = [
'a/.x/b',
'.x',
'.x/',
'.x/a',
'.x/a/',
'.x/a/b',
'a/.x/b/.x/c',
'.x/.x',
'.x/.x/',
'.x/.y',
]),
[
'**/.x/**',
[
'.x/',
'.x/a',
'.x/a/',
'.x/a/b',
'a/.x/b',
'a/b/.x/',
Expand All @@ -284,6 +285,17 @@ module.exports = [
'a/b/.x/c/d/e',
],
],
'test equivalence of **/* and */**',
['.x/**/*', ['.x/a/', '.x/a/b']],
['.x/*/**', ['.x/a/', '.x/a/b']],
['.x/**/**/*', ['.x/a/', '.x/a/b']],
['.x/**/*/**', ['.x/a/', '.x/a/b']],
['.x/*/**/**', ['.x/a/', '.x/a/b']],
['.x/**/*', ['.x/a/', '.x/a/b', '.x/.x/', '.x/.y'], { dot: true }],
['.x/*/**', ['.x/a/', '.x/a/b', '.x/.x/'], { dot: true }],
['.x/**/**/*', ['.x/a/', '.x/a/b', '.x/.x/', '.x/.y'], { dot: true }],
['.x/**/*/**', ['.x/a/', '.x/a/b', '.x/.x/'], { dot: true }],
['.x/*/**/**', ['.x/a/', '.x/a/b', '.x/.x/'], { dot: true }],

['**/.x/**', ['a/.x/b'], { noglobstar: true }],

Expand Down Expand Up @@ -336,12 +348,49 @@ module.exports = [
// doesn't start at 0, no dice
// neg extglobs don't trigger this behavior.
['!(.a|js)@(.*)', ['a.js'], { nonegate: true }],
() => files=['a(b', 'ab', 'a)b'],
() => (files = ['a(b', 'ab', 'a)b']),
['@(a|a[(])b', ['a(b', 'ab']],
['@(a|a[)])b', ['a)b', 'ab']],

// TODO: recursive descent parser for extglobs, to do this properly
// ['@(+(.*))', ['.a', '.a.js', '.js']],

'optimized checking for some common patterns',
() =>
(files = [
'.a',
'.a.js',
'.js',
'a',
'a.js',
'js',
'a.JS',
'.a.JS',
'.JS',
'.',
'..',
]),
['*.js', ['a.js']],
['*.js', ['a.js', '.a.js', '.js'], { dot: true }],
['*.js', ['a.js', 'a.JS'], { nocase: true }],
[
'*.js',
['a.js', 'a.JS', '.a.js', '.a.JS', '.js', '.JS'],
{ dot: true, nocase: true },
],
['*.*', ['a.js', 'a.JS']],
[
'*.*',
['.a', '.a.js', '.js', 'a.js', 'a.JS', '.a.JS', '.JS'],
{ dot: true },
],
['.*', ['.a', '.a.js', '.js', '.a.JS', '.JS']],
['*', ['a', 'a.js', 'js', 'a.JS']],
[
'*',
['.a', '.a.js', '.js', 'a', 'a.js', 'js', 'a.JS', '.a.JS', '.JS'],
{ dot: true },
],
]

Object.defineProperty(module.exports, 'files', {
Expand Down

0 comments on commit 763a256

Please sign in to comment.