Skip to content

Commit

Permalink
fix: perf regression on hot string munging path
Browse files Browse the repository at this point in the history
This doesn't change any functionality, but it optimizes a few extremely
hot code paths for the input typically encountered during a large npm
project installation.

`String.normalize()` is cached, and trailing-slash removal is done with
a single `String.slice()`, rather than multiple slices and
`String.length` comparisons.

It is extremely rare that any code path is ever hot enough for this kind
of thing to be relevant enough to justify this sort of
microoptimization, but these two issues resulted in a 25-50% install
time increase in some cases, which is fairly dramatic.

Fix: npm/cli#3676

PR-URL: #286
Credit: @isaacs
Close: #286
Reviewed-by: @wraithgar
  • Loading branch information
isaacs authored and lukekarrys committed Oct 30, 2021
1 parent c4569d9 commit c5b5427
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 28 deletions.
11 changes: 11 additions & 0 deletions lib/normalize-unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// warning: extremely hot code path.
// This has been meticulously optimized for use
// within npm install on large package trees.
// Do not edit without careful benchmarking.
const normalizeCache = Object.create(null)
const {hasOwnProperty} = Object.prototype
module.exports = s => {
if (!hasOwnProperty.call(normalizeCache, s))
normalizeCache[s] = s.normalize('NFKD')
return normalizeCache[s]
}
9 changes: 4 additions & 5 deletions lib/path-reservations.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// while still allowing maximal safe parallelization.

const assert = require('assert')
const normPath = require('./normalize-windows-path.js')
const normalize = require('./normalize-unicode.js')
const stripSlashes = require('./strip-trailing-slashes.js')
const { join } = require('path')

Expand All @@ -28,7 +28,7 @@ module.exports = () => {
const getDirs = path => {
const dirs = path.split('/').slice(0, -1).reduce((set, path) => {
if (set.length)
path = normPath(join(set[set.length - 1], path))
path = join(set[set.length - 1], path)
set.push(path || '/')
return set
}, [])
Expand Down Expand Up @@ -116,9 +116,8 @@ module.exports = () => {
// So, we just pretend that every path matches every other path here,
// effectively removing all parallelization on windows.
paths = isWindows ? ['win32 parallelization disabled'] : paths.map(p => {
return stripSlashes(normPath(join(p)))
.normalize('NFKD')
.toLowerCase()
// don't need normPath, because we skip this entirely for windows
return normalize(stripSlashes(join(p))).toLowerCase()
})

const dirs = new Set(
Expand Down
31 changes: 10 additions & 21 deletions lib/strip-trailing-slashes.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
// this is the only approach that was significantly faster than using
// str.replace(/\/+$/, '') for strings ending with a lot of / chars and
// containing multiple / chars.
const batchStrings = [
'/'.repeat(1024),
'/'.repeat(512),
'/'.repeat(256),
'/'.repeat(128),
'/'.repeat(64),
'/'.repeat(32),
'/'.repeat(16),
'/'.repeat(8),
'/'.repeat(4),
'/'.repeat(2),
'/',
]

// warning: extremely hot code path.
// This has been meticulously optimized for use
// within npm install on large package trees.
// Do not edit without careful benchmarking.
module.exports = str => {
for (const s of batchStrings) {
while (str.length >= s.length && str.slice(-1 * s.length) === s)
str = str.slice(0, -1 * s.length)
let i = str.length - 1
let slashesStart = -1
while (i > -1 && str.charAt(i) === '/') {
slashesStart = i
i--
}
return str
return slashesStart === -1 ? str : str.slice(0, slashesStart)
}
4 changes: 2 additions & 2 deletions lib/unpack.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const stripAbsolutePath = require('./strip-absolute-path.js')
const pathReservations = require('./path-reservations.js')
const normPath = require('./normalize-windows-path.js')
const stripSlash = require('./strip-trailing-slashes.js')
const normalize = require('./normalize-unicode.js')

const ONENTRY = Symbol('onEntry')
const CHECKFS = Symbol('checkFs')
Expand Down Expand Up @@ -103,8 +104,7 @@ const uint32 = (a, b, c) =>
// Note that on windows, we always drop the entire cache whenever a
// symbolic link is encountered, because 8.3 filenames are impossible
// to reason about, and collisions are hazards rather than just failures.
const cacheKeyNormalize = path => stripSlash(normPath(path))
.normalize('NFKD')
const cacheKeyNormalize = path => normalize(stripSlash(normPath(path)))
.toLowerCase()

const pruneCache = (cache, abs) => {
Expand Down
12 changes: 12 additions & 0 deletions test/normalize-unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const t = require('tap')
const normalize = require('../lib/normalize-unicode.js')

// café
const cafe1 = Buffer.from([0x63, 0x61, 0x66, 0xc3, 0xa9]).toString()

// cafe with a `
const cafe2 = Buffer.from([0x63, 0x61, 0x66, 0x65, 0xcc, 0x81]).toString()

t.equal(normalize(cafe1), normalize(cafe2), 'matching unicodes')
t.equal(normalize(cafe1), normalize(cafe2), 'cached')
t.equal(normalize('foo'), 'foo', 'non-unicdoe string')
1 change: 1 addition & 0 deletions test/strip-trailing-slashes.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ const stripSlash = require('../lib/strip-trailing-slashes.js')
const short = '///a///b///c///'
const long = short.repeat(10) + '/'.repeat(1000000)

t.equal(stripSlash('no slash'), 'no slash')
t.equal(stripSlash(short), '///a///b///c')
t.equal(stripSlash(long), short.repeat(9) + '///a///b///c')

0 comments on commit c5b5427

Please sign in to comment.