Skip to content

Commit

Permalink
perf: Reduce the use of Generators in critical sections. (#6015)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S authored Jul 31, 2024
1 parent 8c45457 commit be8e3e4
Show file tree
Hide file tree
Showing 10 changed files with 157 additions and 103 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Args: '["--config=../../../cspell-power-shell-docs.config.yaml","**"]'
Summary:
files: 2683
filesWithIssues: 1239
issues: 5376
issues: 5379
errors: 0
Errors: []

Expand Down Expand Up @@ -1467,6 +1467,7 @@ issues:
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:806:87 redirections U when present, across redirections."
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:131:21 jdoe U User = 'jdoe'"
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:165:44 docspage U StreamWriter]::new('.\\docspage.html', $false, $Response"
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:236:26 jdoe U the image data for `jdoe.png` is submitted."
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:250:59 unkownhost U Uri \"www.microsoft.com/unkownhost\""
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:353:2 httpbin U [httpbin.org](https://httpbin"
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:796:32 Passthru U pipeline, use the **Passthru** parameter."
Expand Down Expand Up @@ -2198,6 +2199,7 @@ issues:
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:1529:38 Brotli U added support for the Brotli compression algorithm"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 jdoe U User = 'jdoe'"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 docspage U StreamWriter]::new('.\\docspage.html', $false, $Response"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 jdoe U the image data for `jdoe.png` is submitted."
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 unkownhost U Uri \"www.microsoft.com/unkownhost\""
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 httpbin U [httpbin.org](https://httpbin"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 Passthru U pipeline, use the **Passthru** parameter."
Expand Down Expand Up @@ -2932,6 +2934,7 @@ issues:
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:1529:38 Brotli U added support for the Brotli compression algorithm"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 jdoe U User = 'jdoe'"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 docspage U StreamWriter]::new('.\\docspage.html', $false, $Response"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 jdoe U the image data for `jdoe.png` is submitted."
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 unkownhost U Uri \"www.microsoft.com/unkownhost\""
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 httpbin U [httpbin.org](https://httpbin"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 Passthru U pipeline, use the **Passthru** parameter."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Repository: MicrosoftDocs/PowerShell-Docs
Url: "https://github.com/MicrosoftDocs/PowerShell-Docs.git"
Args: ["--config=../../../cspell-power-shell-docs.config.yaml","**"]
Lines:
CSpell: Files checked: 2683, Issues found: 5376 in 1239 files.
CSpell: Files checked: 2683, Issues found: 5379 in 1239 files.
exit code: 1
CODE_OF_CONDUCT.md:10:38 - Unknown word (opensource) -- reach out at [aka.ms/opensource/moderation-support]
LICENSE:139:15 - Unknown word (sublicensable) -- non-sublicensable, non-exclusive, irrevocable
Expand Down Expand Up @@ -1462,6 +1462,7 @@ reference/7.2/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:270:2 - Unkn
reference/7.2/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:806:87 - Unknown word (redirections) -- when present, across redirections.
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:131:21 - Unknown word (jdoe) -- User = 'jdoe'
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:165:44 - Unknown word (docspage) -- StreamWriter]::new('.\docspage.html', $false, $Response
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:236:26 - Unknown word (jdoe) -- the image data for `jdoe.png` is submitted.
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:250:59 - Unknown word (unkownhost) -- Uri "www.microsoft.com/unkownhost"
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:353:2 - Unknown word (httpbin) -- [httpbin.org](https://httpbin
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:796:32 - Unknown word (Passthru) -- pipeline, use the **Passthru** parameter.
Expand Down Expand Up @@ -2194,6 +2195,7 @@ reference/7.4/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:961:71 - Unkn
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 - Unknown word (jdoe) -- User = 'jdoe'
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:1496:38 - Unknown word (Brotli) -- added support for the Brotli compression algorithm
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 - Unknown word (docspage) -- StreamWriter]::new('.\docspage.html', $false, $Response
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 - Unknown word (jdoe) -- the image data for `jdoe.png` is submitted.
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 - Unknown word (unkownhost) -- Uri "www.microsoft.com/unkownhost"
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 - Unknown word (httpbin) -- [httpbin.org](https://httpbin
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 - Unknown word (Passthru) -- pipeline, use the **Passthru** parameter.
Expand Down Expand Up @@ -2928,6 +2930,7 @@ reference/7.5/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:961:71 - Unkn
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 - Unknown word (jdoe) -- User = 'jdoe'
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:1496:38 - Unknown word (Brotli) -- added support for the Brotli compression algorithm
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 - Unknown word (docspage) -- StreamWriter]::new('.\docspage.html', $false, $Response
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 - Unknown word (jdoe) -- the image data for `jdoe.png` is submitted.
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 - Unknown word (unkownhost) -- Uri "www.microsoft.com/unkownhost"
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 - Unknown word (httpbin) -- [httpbin.org](https://httpbin
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 - Unknown word (Passthru) -- pipeline, use the **Passthru** parameter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Args: '["**","--exclude=**/Backup/**"]'
Summary:
files: 489
filesWithIssues: 452
issues: 14311
issues: 14314
errors: 0
Errors: []

Expand Down Expand Up @@ -4806,9 +4806,12 @@ issues:
- "Scripts/Search_Script.sql:147:26 SSRS U WHEN 2 THEN 'SSRS Report'"
- "Scripts/Search_Script.sql:148:26 SSRS U WHEN 3 THEN 'SSRS Resource'"
- "Scripts/Search_Script.sql:156:10 reportserver U FROM reportserver.dbo.Catalog"
- "Scripts/Search_Script.sql:180:12 SSIS U IF @search_SSIS_MSDB = 1"
- "Scripts/Search_Script.sql:180:17 MSDB U IF @search_SSIS_MSDB = 1"
- "Scripts/Search_Script.sql:182:14 SSIS U WITH CTE_SSIS AS ("
- "Scripts/Search_Script.sql:185:44 packagedata U CONVERT(VARBINARY(MAX),packagedata)) AS package_details"
- "Scripts/Search_Script.sql:186:67 packagedata U CONVERT(VARBINARY(MAX),packagedata))) AS package_details"
- "Scripts/Search_Script.sql:187:10 SSIS U 'SSIS Package (MSDB)' AS object"
- "Scripts/Search_Script.sql:187:24 MSDB U 'SSIS Package (MSDB)' AS object_type"
- "Scripts/Search_Script.sql:188:10 msdb U FROM msdb.dbo.sysssispackages"
- "Scripts/Search_Script.sql:188:19 sysssispackages U FROM msdb.dbo.sysssispackages p"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Repository: ktaranov/sqlserver-kit
Url: "https://github.com/ktaranov/sqlserver-kit.git"
Args: ["**","--exclude=**/Backup/**"]
Lines:
CSpell: Files checked: 489, Issues found: 14311 in 452 files.
CSpell: Files checked: 489, Issues found: 14314 in 452 files.
exit code: 1
ADS/README.md:30:15 - Unknown word (Dacpac) -- | [SQL Server Dacpac]
ADS/README.md:30:181 - Unknown word (wizarding) -- Provides an easy-to-use wizarding experience to deploy
Expand Down Expand Up @@ -8112,9 +8112,12 @@ Scripts/Search_Script.sql:147:26 - Unknown word (SSRS) -- WHEN 2 THEN
Scripts/Search_Script.sql:148:26 - Unknown word (SSRS) -- WHEN 3 THEN 'SSRS Resource'
Scripts/Search_Script.sql:14:17 - Unknown word (SSIS) -- DECLARE @search_SSIS_disk BIT = 0;
Scripts/Search_Script.sql:156:10 - Unknown word (reportserver) -- FROM reportserver.dbo.Catalog
Scripts/Search_Script.sql:180:12 - Unknown word (SSIS) -- IF @search_SSIS_MSDB = 1
Scripts/Search_Script.sql:180:17 - Unknown word (MSDB) -- IF @search_SSIS_MSDB = 1
Scripts/Search_Script.sql:182:14 - Unknown word (SSIS) -- WITH CTE_SSIS AS (
Scripts/Search_Script.sql:185:44 - Unknown word (packagedata) -- CONVERT(VARBINARY(MAX),packagedata)) AS package_details
Scripts/Search_Script.sql:186:67 - Unknown word (packagedata) -- CONVERT(VARBINARY(MAX),packagedata))) AS package_details
Scripts/Search_Script.sql:187:10 - Unknown word (SSIS) -- 'SSIS Package (MSDB)' AS object
Scripts/Search_Script.sql:187:24 - Unknown word (MSDB) -- 'SSIS Package (MSDB)' AS object_type
Scripts/Search_Script.sql:188:10 - Unknown word (msdb) -- FROM msdb.dbo.sysssispackages
Scripts/Search_Script.sql:188:19 - Unknown word (sysssispackages) -- FROM msdb.dbo.sysssispackages p
Expand Down
133 changes: 85 additions & 48 deletions packages/cspell-lib/src/lib/textValidation/lineValidatorFactory.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import { opConcatMap, opFilter, opMap, pipe, toArray } from '@cspell/cspell-pipe/sync';
import { opConcatMap, opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
import type { ParsedText } from '@cspell/cspell-types';
import type { CachingDictionary, SearchOptions, SpellingDictionary } from 'cspell-dictionary';
import { createCachingDictionary } from 'cspell-dictionary';

import type { ValidationIssue } from '../Models/ValidationIssue.js';
import * as RxPat from '../Settings/RegExpPatterns.js';
import * as Text from '../util/text.js';
import { clean } from '../util/util.js';
import { split } from '../util/wordSplitter.js';
import { defaultMinWordLength } from './defaultConstants.js';
import { isWordValidWithEscapeRetry } from './isWordValid.js';
Expand All @@ -16,7 +15,6 @@ import type {
LineValidatorFn,
MappedTextValidationResult,
TextOffsetRO,
TextOffsetRW,
TextValidatorFn,
ValidationIssueRO,
ValidationOptions,
Expand All @@ -27,8 +25,12 @@ interface LineValidator {
dict: CachingDictionary;
}

interface TextOffsetWithLine extends TextOffsetRW {
line?: TextOffsetRO;
interface WordStatusInfo {
word: string;
isFound: boolean | undefined;
isFlagged: boolean | undefined;
isIgnored: boolean | undefined;
fin: boolean;
}

export function lineValidatorFactory(sDict: SpellingDictionary, options: ValidationOptions): LineValidator {
Expand All @@ -45,6 +47,8 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat

const dictCol = createCachingDictionary(sDict, hasWordOptions);

const knownWords = new Map<string, WordStatusInfo>();

const setOfFlagWords = new Set(flagWords);
const setOfKnownSuccessfulWords = new Set<string>();
const rememberFilter =
Expand All @@ -60,26 +64,33 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return !setOfKnownSuccessfulWords.has(wo.text);
};

function testForFlaggedWord(wo: TextOffsetRO): boolean {
const text = wo.text;
return setOfFlagWords.has(text) || setOfFlagWords.has(text.toLowerCase()) || dictCol.isForbidden(text);
function calcIgnored(info: WordStatusInfo): boolean {
info.isIgnored ??= dictCol.isNoSuggestWord(info.word);
return info.isIgnored;
}

function calcFlagged(info: WordStatusInfo): boolean {
if (info.isFlagged !== undefined) return info.isFlagged;
const word = info.word;
info.isFlagged =
(setOfFlagWords.has(word) || setOfFlagWords.has(word.toLowerCase()) || dictCol.isForbidden(word)) &&
!calcIgnored(info);
return info.isFlagged;
}

function isWordIgnored(word: string): boolean {
return dictCol.isNoSuggestWord(word);
return calcIgnored(getWordInfo(word));
}

function getSuggestions(word: string) {
return dictCol.getPreferredSuggestions(word);
}

function isWordFlagged(word: TextOffsetRO): boolean {
const isIgnored = isWordIgnored(word.text);
const isFlagged = !isIgnored && testForFlaggedWord(word);
return isFlagged;
function isWordFlagged(wo: TextOffsetRO): boolean {
return calcFlagged(getWordInfo(wo.text));
}

function annotateIsFlagged(word: ValidationIssue): ValidationIssueRO {
function annotateIsFlagged(word: ValidationIssue): ValidationIssue {
word.isFlagged = isWordFlagged(word);
return word;
}
Expand All @@ -92,18 +103,38 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return issue;
}

function checkWord(word: ValidationIssueRO): ValidationIssueRO {
const isIgnored = isWordIgnored(word.text);
const { isFlagged = !isIgnored && testForFlaggedWord(word) } = word;
const isFound = isFlagged ? undefined : isIgnored || isWordValidWithEscapeRetry(dictCol, word, word.line);
return clean({ ...word, isFlagged, isFound });
const isFlaggedOrMinLength = rememberFilter(
(wo: ValidationIssue) => wo.text.length >= minWordLength || !!wo.isFlagged,
);

const isFlaggedOrNotFound = rememberFilter((wo: ValidationIssue) => wo.isFlagged || !wo.isFound);
const isNotRepeatingChar = rememberFilter((wo: ValidationIssue) => !RxPat.regExRepeatedChar.test(wo.text));

function checkWord(issue: ValidationIssue): ValidationIssueRO {
const info = getWordInfo(issue.text);
if (info.fin) {
const { isFlagged: isForbidden, isFound, isIgnored } = info;
const isFlagged = issue.isFlagged ?? (!isIgnored && isForbidden);
issue.isFlagged = isFlagged;
issue.isFound = isFound;
return issue;
}
const isIgnored = calcIgnored(info);
const isFlagged = issue.isFlagged ?? calcFlagged(info);
const isFound = isFlagged ? undefined : isIgnored || isWordValidWithEscapeRetry(dictCol, issue, issue.line);
info.isFlagged = !!isFlagged;
info.isFound = isFound;
info.fin = true;
issue.isFlagged = isFlagged;
issue.isFound = isFound;
return issue;
}

const fn: LineValidatorFn = (lineSegment: LineSegment) => {
function splitterIsValid(word: TextOffsetRO): boolean {
return (
setOfKnownSuccessfulWords.has(word.text) ||
(!testForFlaggedWord(word) && isWordValidWithEscapeRetry(dictCol, word, lineSegment.line))
(!isWordFlagged(word) && isWordValidWithEscapeRetry(dictCol, word, lineSegment.line))
);
}

Expand All @@ -112,24 +143,21 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return [vr];
}

const codeWordResults = toArray(
pipe(
Text.extractWordsFromCodeTextOffset(vr),
opFilter(filterAlreadyChecked),
opMap((t) => ({ ...t, line: vr.line })),
opMap(annotateIsFlagged),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opMap((wo) => (wo.isFlagged ? wo : checkWord(wo))),
opFilter(rememberFilter((wo) => wo.isFlagged || !wo.isFound)),
opFilter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))),

// get back the original text.
opMap((wo) => ({
...wo,
text: Text.extractText(lineSegment.segment, wo.offset, wo.offset + wo.text.length),
})),
),
);
const codeWordResults: ValidationIssueRO[] = [];

for (const wo of Text.extractWordsFromCodeTextOffset(vr)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = vr.line;
issue.isFlagged = undefined;
issue.isFound = undefined;
annotateIsFlagged(issue);
if (!isFlaggedOrMinLength(issue)) continue;
checkWord(issue);
if (!isFlaggedOrNotFound(issue) || !isNotRepeatingChar(issue)) continue;
issue.text = Text.extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
codeWordResults.push(issue);
}

if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr).isFound) {
rememberFilter((_) => false)(vr);
Expand All @@ -149,16 +177,17 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return [vr];
}

const mismatches: ValidationIssue[] = toArray(
pipe(
Text.extractWordsFromTextOffset(possibleWord),
opFilter((wo: TextOffsetWithLine) => filterAlreadyChecked(wo)),
opMap((wo: TextOffsetWithLine) => ((wo.line = lineSegment.line), wo as ValidationIssue)),
opMap(annotateIsFlagged),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opConcatMap(checkFullWord),
),
);
const mismatches: ValidationIssue[] = [];
for (const wo of Text.extractWordsFromTextOffset(possibleWord)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = lineSegment.line;
annotateIsFlagged(issue);
if (!isFlaggedOrMinLength(issue)) continue;
for (const w of checkFullWord(issue)) {
mismatches.push(w);
}
}
if (mismatches.length) {
// Try the more expensive word splitter
const splitResult = split(lineSegment.segment, possibleWord.offset, splitterIsValid);
Expand All @@ -179,6 +208,14 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return checkedPossibleWords;
};

function getWordInfo(word: string): WordStatusInfo {
const info = knownWords.get(word);
if (info) return info;
const result = { word, isFound: undefined, isFlagged: undefined, isIgnored: undefined, fin: false };
knownWords.set(word, result);
return result;
}

return { fn, dict: dictCol };
}

Expand Down
Loading

0 comments on commit be8e3e4

Please sign in to comment.