Skip to content

Commit

Permalink
fix: Improve perf of camel case word splitter. (#6019)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S authored Aug 1, 2024
1 parent a6ea2c7 commit de29685
Show file tree
Hide file tree
Showing 26 changed files with 281 additions and 129 deletions.
2 changes: 1 addition & 1 deletion packages/cspell-lib/api/api.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/cspell-lib/api/rollup.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import dts from 'rollup-plugin-dts';

const config = [
{
input: './dist/esm/index.d.ts',
input: './dist/lib/index.d.ts',
output: [{ file: './api/api.d.ts', format: 'es' }],
plugins: [dts()],
},
Expand Down
18 changes: 9 additions & 9 deletions packages/cspell-lib/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
"description": "A library of useful functions used across various cspell tools.",
"type": "module",
"sideEffects": false,
"types": "dist/esm/index.d.ts",
"module": "dist/esm/index.js",
"types": "dist/lib/index.d.ts",
"module": "dist/lib/index.js",
"exports": {
".": {
"import": "./dist/esm/index.js"
"import": "./dist/lib/index.js"
}
},
"files": [
Expand All @@ -24,17 +24,16 @@
"scripts": {
"clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"",
"clean-build": "pnpm clean && pnpm build",
"build": "tsc -b . -f && pnpm run build:api",
"build": "tsc -p . && pnpm run build:api",
"build:api": "rollup -c api/rollup.config.mjs",
"build:esm": "tsc -b tsconfig.esm.json -f",
"build:lib": "tsc -b src/lib/tsconfig.json -f",
"watch": "tsc -b . --watch -f",
"watch": "tsc -p . --watch",
"coverage": "vitest run --coverage --pool=forks",
"test-watch": "vitest",
"prepublishOnly": "pnpm run clean-build",
"#test": "vitest run --reporter=hanging-process --reporter=default",
"test": "vitest run --pool=forks",
"test:update-snapshot": "vitest run -u"
"test:update-snapshot": "vitest run -u",
"test:perf": "NODE_ENV=production insight --register ts-node/esm --file \"**/*.perf.{mts,ts}\""
},
"repository": {
"type": "git",
Expand Down Expand Up @@ -99,6 +98,7 @@
"configstore": "^7.0.0",
"cspell-dict-nl-nl": "^1.1.2",
"leaked-handles": "^5.2.0",
"lorem-ipsum": "^2.0.8"
"lorem-ipsum": "^2.0.8",
"perf-insight": "^1.2.0"
}
}
6 changes: 0 additions & 6 deletions packages/cspell-lib/src/lib-cjs/index.cts

This file was deleted.

3 changes: 0 additions & 3 deletions packages/cspell-lib/src/lib-cjs/pkg-info.cts

This file was deleted.

12 changes: 0 additions & 12 deletions packages/cspell-lib/src/lib-cjs/tsconfig.cjs.json

This file was deleted.

3 changes: 0 additions & 3 deletions packages/cspell-lib/src/lib-cjs/tsconfig.json

This file was deleted.

14 changes: 0 additions & 14 deletions packages/cspell-lib/src/lib-cjs/tsconfig.test.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ import { createReaderWriter, CSpellConfigFileInMemory } from 'cspell-config-lib'
import { isUrlLike, toFileURL } from 'cspell-io';
import { URI, Utils as UriUtils } from 'vscode-uri';

import { srcDirectory } from '../../../../lib-cjs/index.cjs';
import { onClearCache } from '../../../events/index.js';
import type { VFileSystem } from '../../../fileSystem.js';
import { getVirtualFS } from '../../../fileSystem.js';
import { createCSpellSettingsInternal as csi } from '../../../Models/CSpellSettingsInternalDef.js';
import { srcDirectory } from '../../../pkg-info.mjs';
import { autoResolve, AutoResolveCache, autoResolveWeak } from '../../../util/AutoResolve.js';
import { logError, logWarning } from '../../../util/logger.js';
import { FileResolver } from '../../../util/resolveFile.js';
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-lib/src/lib/Settings/DefaultSettings.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import type { PredefinedPatterns, RegExpPatternDefinition } from '@cspell/cspell-types';
import { parsers } from 'cspell-grammar';

import { srcDirectory } from '../../lib-cjs/index.cjs';
import type { CSpellSettingsInternal } from '../Models/CSpellSettingsInternalDef.js';
import { createCSpellSettingsInternal } from '../Models/CSpellSettingsInternalDef.js';
import { PatternRegExp } from '../Models/PatternRegExp.js';
import { srcDirectory } from '../pkg-info.mjs';
import { resolveFile } from '../util/resolveFile.js';
import { defaultConfigFileModuleRef } from './constants.js';
import { readSettings } from './Controller/configLoader/index.js';
Expand Down
22 changes: 22 additions & 0 deletions packages/cspell-lib/src/lib/pkg-info.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { fileURLToPath } from 'node:url';

/**
* This is the url of the current file, but it might be undefined if the environment does not support it.
*/
const url = import.meta.url;

/**
* The is the CommonJS __dirname variable, but it might not be defined.
* ESBuild and some other bundlers do support it.
*/
declare const __dirname: string;

function calcSrcDirectory() {
try {
return __dirname;
} catch {
return url ? fileURLToPath(new URL('./', url)) : process.cwd();
}
}

export const srcDirectory = calcSrcDirectory();
10 changes: 6 additions & 4 deletions packages/cspell-lib/src/lib/textValidation/isWordValid.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import type { CachingDictionary } from 'cspell-dictionary';

import type { TextOffsetRO } from './ValidationTypes.js';

function hasWordCheck(dict: CachingDictionary, word: string): boolean {
interface Dict {
has(word: string): boolean;
}

function hasWordCheck(dict: Dict, word: string): boolean {
word = word.includes('\\') ? word.replaceAll('\\', '') : word;
return dict.has(word);
}

export function isWordValidWithEscapeRetry(dict: CachingDictionary, wo: TextOffsetRO, line: TextOffsetRO): boolean {
export function isWordValidWithEscapeRetry(dict: Dict, wo: TextOffsetRO, line: TextOffsetRO): boolean {
const firstTry = hasWordCheck(dict, wo.text);
return (
firstTry ||
Expand Down
35 changes: 25 additions & 10 deletions packages/cspell-lib/src/lib/textValidation/lineValidatorFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import { createCachingDictionary } from 'cspell-dictionary';

import type { ValidationIssue } from '../Models/ValidationIssue.js';
import * as RxPat from '../Settings/RegExpPatterns.js';
import * as Text from '../util/text.js';
import {
extractPossibleWordsFromTextOffset,
extractText,
extractWordsFromCodeTextOffset,
extractWordsFromTextOffset,
} from '../util/text.js';
import { split } from '../util/wordSplitter.js';
import { defaultMinWordLength } from './defaultConstants.js';
import { isWordValidWithEscapeRetry } from './isWordValid.js';
Expand Down Expand Up @@ -64,6 +69,17 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return !setOfKnownSuccessfulWords.has(wo.text);
};

const hasDict = {
has(word: string): boolean {
const info = getWordInfo(word);
if (info.isFound !== undefined) return info.isFound;
if (info.isFlagged) return true;
if (info.isFlagged) return false;
info.isFound = dictCol.has(word);
return info.isFound;
},
};

function calcIgnored(info: WordStatusInfo): boolean {
info.isIgnored ??= dictCol.isNoSuggestWord(info.word);
return info.isIgnored;
Expand Down Expand Up @@ -116,25 +132,24 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
const { isFlagged: isForbidden, isFound, isIgnored } = info;
const isFlagged = issue.isFlagged ?? (!isIgnored && isForbidden);
issue.isFlagged = isFlagged;
issue.isFound = isFound;
issue.isFound = isFlagged ? undefined : isFound;
return issue;
}
const isIgnored = calcIgnored(info);
const isFlagged = issue.isFlagged ?? calcFlagged(info);
const isFound = isFlagged ? undefined : isIgnored || isWordValidWithEscapeRetry(dictCol, issue, issue.line);
info.isFound ??= isFlagged ? false : isIgnored || isWordValidWithEscapeRetry(hasDict, issue, issue.line);
info.isFlagged = !!isFlagged;
info.isFound = isFound;
info.fin = true;
issue.isFlagged = isFlagged;
issue.isFound = isFound;
issue.isFound = isFlagged ? undefined : info.isFound;
return issue;
}

const fn: LineValidatorFn = (lineSegment: LineSegment) => {
function splitterIsValid(word: TextOffsetRO): boolean {
return (
setOfKnownSuccessfulWords.has(word.text) ||
(!isWordFlagged(word) && isWordValidWithEscapeRetry(dictCol, word, lineSegment.line))
(!isWordFlagged(word) && isWordValidWithEscapeRetry(hasDict, word, lineSegment.line))
);
}

Expand All @@ -145,7 +160,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat

const codeWordResults: ValidationIssueRO[] = [];

for (const wo of Text.extractWordsFromCodeTextOffset(vr)) {
for (const wo of extractWordsFromCodeTextOffset(vr)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = vr.line;
Expand All @@ -155,7 +170,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
if (!isFlaggedOrMinLength(issue)) continue;
checkWord(issue);
if (!isFlaggedOrNotFound(issue) || !isNotRepeatingChar(issue)) continue;
issue.text = Text.extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
issue.text = extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
codeWordResults.push(issue);
}

Expand All @@ -178,7 +193,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
}

const mismatches: ValidationIssue[] = [];
for (const wo of Text.extractWordsFromTextOffset(possibleWord)) {
for (const wo of extractWordsFromTextOffset(possibleWord)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = lineSegment.line;
Expand All @@ -200,7 +215,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
}

const checkedPossibleWords: Iterable<ValidationIssue> = pipe(
Text.extractPossibleWordsFromTextOffset(lineSegment.segment),
extractPossibleWordsFromTextOffset(lineSegment.segment),
opFilter(filterAlreadyChecked),
opConcatMap(checkPossibleWords),
opMap(annotateIssue),
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-lib/src/lib/util/resolveFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import { importResolveModuleName } from '@cspell/dynamic-import';
import type { VFileSystem } from 'cspell-io';
import resolveFrom from 'resolve-from';

import { srcDirectory } from '../../lib-cjs/pkg-info.cjs';
import { getFileSystem } from '../fileSystem.js';
import { srcDirectory } from '../pkg-info.mjs';
import { envToTemplateVars, replaceTemplate } from './templates.js';
import {
fileURLOrPathToPath,
Expand Down
27 changes: 26 additions & 1 deletion packages/cspell-lib/src/lib/util/text.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { describe, expect, test } from 'vitest';

import * as Text from './text.js';
import { splitCamelCaseWord } from './text.js';
import { regExSplitWords, regExSplitWords2, regExUpperSOrIng } from './textRegex.js';

// cSpell:ignore Ápple DBAs ctrip γάμμα

Expand All @@ -28,6 +29,19 @@ describe('Util Text', () => {
expect(Text.stringToRegExp(pattern)).toEqual(expected);
});

test.each`
word | expected
${'hello'} | ${'hello'.split('|')}
${'helloThere'} | ${['hello', 'There']}
${'HelloThere'} | ${['Hello', 'There']}
${'BigÁpple'} | ${['Big', 'Ápple']}
${'ASCIIToUTF16'} | ${['ASCII', 'To', 'UTF16']}
${'URLsAndDBAs'} | ${['URLs', 'And', 'DBAs']}
${'WALKingRUNning'} | ${['WALKing', 'RUNning']}
`('splitCamelCaseWord $word', ({ word, expected }) => {
expect(splitCamelCaseWord(word)).toEqual(expected);
});

test.each`
word | expected
${'hello'} | ${'hello'.split('|')}
Expand All @@ -38,7 +52,7 @@ describe('Util Text', () => {
${'URLsAndDBAs'} | ${['Urls', 'And', 'Dbas']}
${'WALKingRUNning'} | ${['Walking', 'Running']}
`('splitCamelCaseWord $word', ({ word, expected }) => {
expect(splitCamelCaseWord(word)).toEqual(expected);
expect(splitCamelCaseWordOrig(word)).toEqual(expected);
});

test('extract word from text', () => {
Expand Down Expand Up @@ -473,3 +487,14 @@ SQL;
Not checked.
`;

/**
* Split camelCase words into an array of strings.
*/
function splitCamelCaseWordOrig(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = wPrime.replace(regExSplitWords, '$1' + separator + '$2');
const pass2 = pass1.replace(regExSplitWords2, '$1' + separator + '$2');
return pass2.split(separator);
}
28 changes: 8 additions & 20 deletions packages/cspell-lib/src/lib/util/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ import {
regExAllUpper,
regExFirstUpper,
regExIgnoreCharacters,
regExSplitWords,
regExSplitWords2,
regExUpperSOrIng,
regExpSplitWordBreaks,
regExWords,
regExWordsAndDigits,
} from './textRegex.js';
Expand All @@ -35,11 +33,7 @@ export function splitCamelCaseWordWithOffset(wo: TextOffset): Array<TextOffset>
* Split camelCase words into an array of strings.
*/
export function splitCamelCaseWord(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = wPrime.replace(regExSplitWords, '$1' + separator + '$2');
const pass2 = pass1.replace(regExSplitWords2, '$1' + separator + '$2');
return pass2.split(separator);
return word.split(regExpSplitWordBreaks);
}

/**
Expand All @@ -55,12 +49,13 @@ export function matchStringToTextOffset(reg: RegExp, text: string): Iterable<Tex
return matchToTextOffset(reg, { text, offset: 0 });
}

export function matchToTextOffset(reg: RegExp, text: TextOffset): Iterable<TextOffset> {
const textOffset = text;
const fnOffsetMap = offsetMap(textOffset.offset);
export function matchToTextOffset(reg: RegExp, t: TextOffset): Iterable<TextOffset> {
const text = t.text;
const offset = t.offset;
// return opMap((m: RegExpExecArray) => ({ text: m[0], offset: offset + m.index }))(match(reg, text));
return pipe(
match(reg, textOffset.text),
opMap((m) => fnOffsetMap<TextOffset>({ text: m[0], offset: m.index || 0 })),
match(reg, text),
opMap((m) => ({ text: m[0], offset: offset + m.index })),
);
}

Expand Down Expand Up @@ -182,13 +177,6 @@ export function extractText(textOffset: TextOffset, startPos: number, endPos: nu
return text.slice(a, b);
}

interface OffsetMap {
offset: number;
}
function offsetMap(offset: number) {
return <T extends OffsetMap>(xo: T) => ({ ...xo, offset: xo.offset + offset }) as T;
}

export function calculateTextDocumentOffsets<T extends TextOffset>(
uri: string | Uri | URL,
doc: string,
Expand Down
Loading

0 comments on commit de29685

Please sign in to comment.