Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Improve perf of camel case word splitter. #6019

Merged
merged 3 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/cspell-lib/api/api.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/cspell-lib/api/rollup.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import dts from 'rollup-plugin-dts';

const config = [
{
input: './dist/esm/index.d.ts',
input: './dist/lib/index.d.ts',
output: [{ file: './api/api.d.ts', format: 'es' }],
plugins: [dts()],
},
Expand Down
18 changes: 9 additions & 9 deletions packages/cspell-lib/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
"description": "A library of useful functions used across various cspell tools.",
"type": "module",
"sideEffects": false,
"types": "dist/esm/index.d.ts",
"module": "dist/esm/index.js",
"types": "dist/lib/index.d.ts",
"module": "dist/lib/index.js",
"exports": {
".": {
"import": "./dist/esm/index.js"
"import": "./dist/lib/index.js"
}
},
"files": [
Expand All @@ -24,17 +24,16 @@
"scripts": {
"clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"",
"clean-build": "pnpm clean && pnpm build",
"build": "tsc -b . -f && pnpm run build:api",
"build": "tsc -p . && pnpm run build:api",
"build:api": "rollup -c api/rollup.config.mjs",
"build:esm": "tsc -b tsconfig.esm.json -f",
"build:lib": "tsc -b src/lib/tsconfig.json -f",
"watch": "tsc -b . --watch -f",
"watch": "tsc -p . --watch",
"coverage": "vitest run --coverage --pool=forks",
"test-watch": "vitest",
"prepublishOnly": "pnpm run clean-build",
"#test": "vitest run --reporter=hanging-process --reporter=default",
"test": "vitest run --pool=forks",
"test:update-snapshot": "vitest run -u"
"test:update-snapshot": "vitest run -u",
"test:perf": "NODE_ENV=production insight --register ts-node/esm --file \"**/*.perf.{mts,ts}\""
},
"repository": {
"type": "git",
Expand Down Expand Up @@ -99,6 +98,7 @@
"configstore": "^7.0.0",
"cspell-dict-nl-nl": "^1.1.2",
"leaked-handles": "^5.2.0",
"lorem-ipsum": "^2.0.8"
"lorem-ipsum": "^2.0.8",
"perf-insight": "^1.2.0"
}
}
6 changes: 0 additions & 6 deletions packages/cspell-lib/src/lib-cjs/index.cts

This file was deleted.

3 changes: 0 additions & 3 deletions packages/cspell-lib/src/lib-cjs/pkg-info.cts

This file was deleted.

12 changes: 0 additions & 12 deletions packages/cspell-lib/src/lib-cjs/tsconfig.cjs.json

This file was deleted.

3 changes: 0 additions & 3 deletions packages/cspell-lib/src/lib-cjs/tsconfig.json

This file was deleted.

14 changes: 0 additions & 14 deletions packages/cspell-lib/src/lib-cjs/tsconfig.test.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ import { createReaderWriter, CSpellConfigFileInMemory } from 'cspell-config-lib'
import { isUrlLike, toFileURL } from 'cspell-io';
import { URI, Utils as UriUtils } from 'vscode-uri';

import { srcDirectory } from '../../../../lib-cjs/index.cjs';
import { onClearCache } from '../../../events/index.js';
import type { VFileSystem } from '../../../fileSystem.js';
import { getVirtualFS } from '../../../fileSystem.js';
import { createCSpellSettingsInternal as csi } from '../../../Models/CSpellSettingsInternalDef.js';
import { srcDirectory } from '../../../pkg-info.mjs';
import { autoResolve, AutoResolveCache, autoResolveWeak } from '../../../util/AutoResolve.js';
import { logError, logWarning } from '../../../util/logger.js';
import { FileResolver } from '../../../util/resolveFile.js';
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-lib/src/lib/Settings/DefaultSettings.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import type { PredefinedPatterns, RegExpPatternDefinition } from '@cspell/cspell-types';
import { parsers } from 'cspell-grammar';

import { srcDirectory } from '../../lib-cjs/index.cjs';
import type { CSpellSettingsInternal } from '../Models/CSpellSettingsInternalDef.js';
import { createCSpellSettingsInternal } from '../Models/CSpellSettingsInternalDef.js';
import { PatternRegExp } from '../Models/PatternRegExp.js';
import { srcDirectory } from '../pkg-info.mjs';
import { resolveFile } from '../util/resolveFile.js';
import { defaultConfigFileModuleRef } from './constants.js';
import { readSettings } from './Controller/configLoader/index.js';
Expand Down
22 changes: 22 additions & 0 deletions packages/cspell-lib/src/lib/pkg-info.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { fileURLToPath } from 'node:url';

/**
* This is the url of the current file, but it might be undefined if the environment does not support it.
*/
const url = import.meta.url;

/**
* The is the CommonJS __dirname variable, but it might not be defined.
* ESBuild and some other bundlers do support it.
*/
declare const __dirname: string;

function calcSrcDirectory() {
try {
return __dirname;
} catch {
return url ? fileURLToPath(new URL('./', url)) : process.cwd();
}
}

export const srcDirectory = calcSrcDirectory();
10 changes: 6 additions & 4 deletions packages/cspell-lib/src/lib/textValidation/isWordValid.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import type { CachingDictionary } from 'cspell-dictionary';

import type { TextOffsetRO } from './ValidationTypes.js';

function hasWordCheck(dict: CachingDictionary, word: string): boolean {
interface Dict {
has(word: string): boolean;
}

function hasWordCheck(dict: Dict, word: string): boolean {
word = word.includes('\\') ? word.replaceAll('\\', '') : word;
return dict.has(word);
}

export function isWordValidWithEscapeRetry(dict: CachingDictionary, wo: TextOffsetRO, line: TextOffsetRO): boolean {
export function isWordValidWithEscapeRetry(dict: Dict, wo: TextOffsetRO, line: TextOffsetRO): boolean {
const firstTry = hasWordCheck(dict, wo.text);
return (
firstTry ||
Expand Down
35 changes: 25 additions & 10 deletions packages/cspell-lib/src/lib/textValidation/lineValidatorFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import { createCachingDictionary } from 'cspell-dictionary';

import type { ValidationIssue } from '../Models/ValidationIssue.js';
import * as RxPat from '../Settings/RegExpPatterns.js';
import * as Text from '../util/text.js';
import {
extractPossibleWordsFromTextOffset,
extractText,
extractWordsFromCodeTextOffset,
extractWordsFromTextOffset,
} from '../util/text.js';
import { split } from '../util/wordSplitter.js';
import { defaultMinWordLength } from './defaultConstants.js';
import { isWordValidWithEscapeRetry } from './isWordValid.js';
Expand Down Expand Up @@ -64,6 +69,17 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return !setOfKnownSuccessfulWords.has(wo.text);
};

const hasDict = {
has(word: string): boolean {
const info = getWordInfo(word);
if (info.isFound !== undefined) return info.isFound;
if (info.isFlagged) return true;
if (info.isFlagged) return false;
info.isFound = dictCol.has(word);
return info.isFound;
},
};

function calcIgnored(info: WordStatusInfo): boolean {
info.isIgnored ??= dictCol.isNoSuggestWord(info.word);
return info.isIgnored;
Expand Down Expand Up @@ -116,25 +132,24 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
const { isFlagged: isForbidden, isFound, isIgnored } = info;
const isFlagged = issue.isFlagged ?? (!isIgnored && isForbidden);
issue.isFlagged = isFlagged;
issue.isFound = isFound;
issue.isFound = isFlagged ? undefined : isFound;
return issue;
}
const isIgnored = calcIgnored(info);
const isFlagged = issue.isFlagged ?? calcFlagged(info);
const isFound = isFlagged ? undefined : isIgnored || isWordValidWithEscapeRetry(dictCol, issue, issue.line);
info.isFound ??= isFlagged ? false : isIgnored || isWordValidWithEscapeRetry(hasDict, issue, issue.line);
info.isFlagged = !!isFlagged;
info.isFound = isFound;
info.fin = true;
issue.isFlagged = isFlagged;
issue.isFound = isFound;
issue.isFound = isFlagged ? undefined : info.isFound;
return issue;
}

const fn: LineValidatorFn = (lineSegment: LineSegment) => {
function splitterIsValid(word: TextOffsetRO): boolean {
return (
setOfKnownSuccessfulWords.has(word.text) ||
(!isWordFlagged(word) && isWordValidWithEscapeRetry(dictCol, word, lineSegment.line))
(!isWordFlagged(word) && isWordValidWithEscapeRetry(hasDict, word, lineSegment.line))
);
}

Expand All @@ -145,7 +160,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat

const codeWordResults: ValidationIssueRO[] = [];

for (const wo of Text.extractWordsFromCodeTextOffset(vr)) {
for (const wo of extractWordsFromCodeTextOffset(vr)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = vr.line;
Expand All @@ -155,7 +170,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
if (!isFlaggedOrMinLength(issue)) continue;
checkWord(issue);
if (!isFlaggedOrNotFound(issue) || !isNotRepeatingChar(issue)) continue;
issue.text = Text.extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
issue.text = extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
codeWordResults.push(issue);
}

Expand All @@ -178,7 +193,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
}

const mismatches: ValidationIssue[] = [];
for (const wo of Text.extractWordsFromTextOffset(possibleWord)) {
for (const wo of extractWordsFromTextOffset(possibleWord)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = lineSegment.line;
Expand All @@ -200,7 +215,7 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
}

const checkedPossibleWords: Iterable<ValidationIssue> = pipe(
Text.extractPossibleWordsFromTextOffset(lineSegment.segment),
extractPossibleWordsFromTextOffset(lineSegment.segment),
opFilter(filterAlreadyChecked),
opConcatMap(checkPossibleWords),
opMap(annotateIssue),
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-lib/src/lib/util/resolveFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import { importResolveModuleName } from '@cspell/dynamic-import';
import type { VFileSystem } from 'cspell-io';
import resolveFrom from 'resolve-from';

import { srcDirectory } from '../../lib-cjs/pkg-info.cjs';
import { getFileSystem } from '../fileSystem.js';
import { srcDirectory } from '../pkg-info.mjs';
import { envToTemplateVars, replaceTemplate } from './templates.js';
import {
fileURLOrPathToPath,
Expand Down
27 changes: 26 additions & 1 deletion packages/cspell-lib/src/lib/util/text.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { describe, expect, test } from 'vitest';

import * as Text from './text.js';
import { splitCamelCaseWord } from './text.js';
import { regExSplitWords, regExSplitWords2, regExUpperSOrIng } from './textRegex.js';

// cSpell:ignore Ápple DBAs ctrip γάμμα

Expand All @@ -28,6 +29,19 @@ describe('Util Text', () => {
expect(Text.stringToRegExp(pattern)).toEqual(expected);
});

test.each`
word | expected
${'hello'} | ${'hello'.split('|')}
${'helloThere'} | ${['hello', 'There']}
${'HelloThere'} | ${['Hello', 'There']}
${'BigÁpple'} | ${['Big', 'Ápple']}
${'ASCIIToUTF16'} | ${['ASCII', 'To', 'UTF16']}
${'URLsAndDBAs'} | ${['URLs', 'And', 'DBAs']}
${'WALKingRUNning'} | ${['WALKing', 'RUNning']}
`('splitCamelCaseWord $word', ({ word, expected }) => {
expect(splitCamelCaseWord(word)).toEqual(expected);
});

test.each`
word | expected
${'hello'} | ${'hello'.split('|')}
Expand All @@ -38,7 +52,7 @@ describe('Util Text', () => {
${'URLsAndDBAs'} | ${['Urls', 'And', 'Dbas']}
${'WALKingRUNning'} | ${['Walking', 'Running']}
`('splitCamelCaseWord $word', ({ word, expected }) => {
expect(splitCamelCaseWord(word)).toEqual(expected);
expect(splitCamelCaseWordOrig(word)).toEqual(expected);
});

test('extract word from text', () => {
Expand Down Expand Up @@ -473,3 +487,14 @@ SQL;
Not checked.

`;

/**
* Split camelCase words into an array of strings.
*/
function splitCamelCaseWordOrig(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = wPrime.replace(regExSplitWords, '$1' + separator + '$2');
const pass2 = pass1.replace(regExSplitWords2, '$1' + separator + '$2');
return pass2.split(separator);
}
28 changes: 8 additions & 20 deletions packages/cspell-lib/src/lib/util/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ import {
regExAllUpper,
regExFirstUpper,
regExIgnoreCharacters,
regExSplitWords,
regExSplitWords2,
regExUpperSOrIng,
regExpSplitWordBreaks,
regExWords,
regExWordsAndDigits,
} from './textRegex.js';
Expand All @@ -35,11 +33,7 @@ export function splitCamelCaseWordWithOffset(wo: TextOffset): Array<TextOffset>
* Split camelCase words into an array of strings.
*/
export function splitCamelCaseWord(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = wPrime.replace(regExSplitWords, '$1' + separator + '$2');
const pass2 = pass1.replace(regExSplitWords2, '$1' + separator + '$2');
return pass2.split(separator);
return word.split(regExpSplitWordBreaks);
}

/**
Expand All @@ -55,12 +49,13 @@ export function matchStringToTextOffset(reg: RegExp, text: string): Iterable<Tex
return matchToTextOffset(reg, { text, offset: 0 });
}

export function matchToTextOffset(reg: RegExp, text: TextOffset): Iterable<TextOffset> {
const textOffset = text;
const fnOffsetMap = offsetMap(textOffset.offset);
export function matchToTextOffset(reg: RegExp, t: TextOffset): Iterable<TextOffset> {
const text = t.text;
const offset = t.offset;
// return opMap((m: RegExpExecArray) => ({ text: m[0], offset: offset + m.index }))(match(reg, text));
return pipe(
match(reg, textOffset.text),
opMap((m) => fnOffsetMap<TextOffset>({ text: m[0], offset: m.index || 0 })),
match(reg, text),
opMap((m) => ({ text: m[0], offset: offset + m.index })),
);
}

Expand Down Expand Up @@ -182,13 +177,6 @@ export function extractText(textOffset: TextOffset, startPos: number, endPos: nu
return text.slice(a, b);
}

interface OffsetMap {
offset: number;
}
function offsetMap(offset: number) {
return <T extends OffsetMap>(xo: T) => ({ ...xo, offset: xo.offset + offset }) as T;
}

export function calculateTextDocumentOffsets<T extends TextOffset>(
uri: string | Uri | URL,
doc: string,
Expand Down
Loading