From c9a5635aa57047e563c14df1507a1e478d725cc8 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Sun, 31 Jul 2022 23:15:16 +0200 Subject: [PATCH] Add tests for `getNodeFromOffset` Includes refactoring and fixes --- .../server-common/src/ast/nodeManager.ts | 4 + .../src/parser/yamlDocument.ts | 20 +- .../src/utils/indentationGuesser.ts | 235 ++++++++++++++++++ .../src/utils/textBuffer.ts | 18 +- .../tests/unit/yamlParser.test.ts | 52 ++++ 5 files changed, 318 insertions(+), 11 deletions(-) create mode 100644 server/packages/yaml-language-service/src/utils/indentationGuesser.ts diff --git a/server/packages/server-common/src/ast/nodeManager.ts b/server/packages/server-common/src/ast/nodeManager.ts index 1a9f16b..545e970 100644 --- a/server/packages/server-common/src/ast/nodeManager.ts +++ b/server/packages/server-common/src/ast/nodeManager.ts @@ -54,6 +54,10 @@ export class ASTNodeManager { return node === lastNode; } + public isRoot(node: ASTNode): boolean { + return node.parent === undefined; + } + public getPreviousSiblingNode(node: ASTNode): ASTNode | null { const parent = node.parent; if (!parent || !parent.children) { diff --git a/server/packages/yaml-language-service/src/parser/yamlDocument.ts b/server/packages/yaml-language-service/src/parser/yamlDocument.ts index af1cb2d..591d6be 100644 --- a/server/packages/yaml-language-service/src/parser/yamlDocument.ts +++ b/server/packages/yaml-language-service/src/parser/yamlDocument.ts @@ -2,12 +2,14 @@ import { ParsedDocument } from "@gxwf/server-common/src/ast/types"; import { TextDocument } from "vscode-languageserver-textdocument"; import { Diagnostic, DiagnosticSeverity, Position } from "vscode-languageserver-types"; import { Document, Node, visit, YAMLError, YAMLWarning } from "yaml"; +import { guessIndentation } from "../utils/indentationGuesser"; import { TextBuffer } from "../utils/textBuffer"; import { ASTNode, ObjectASTNodeImpl } from "./astTypes"; const FULL_LINE_ERROR = true; const YAML_SOURCE = "YAML"; const YAML_COMMENT_SYMBOL = "#"; +const DEFAULT_INDENTATION = 2; export class LineComment { constructor(public readonly text: string) {} @@ -20,11 +22,12 @@ export class LineComment { export class YAMLDocument implements ParsedDocument { private readonly _textBuffer: TextBuffer; private _diagnostics: Diagnostic[] | undefined; - private _configuredIndentation = 2; // TODO read this value from config + private _indentation: number; constructor(public readonly subDocuments: YAMLSubDocument[], public readonly textDocument: TextDocument) { this._textBuffer = new TextBuffer(textDocument); this._diagnostics = undefined; + this._indentation = guessIndentation(this._textBuffer, DEFAULT_INDENTATION, true).tabSize; } public get root(): ASTNode | undefined { @@ -64,20 +67,21 @@ export class YAMLDocument implements ParsedDocument { const rootNode = this.root as ObjectASTNodeImpl; if (!rootNode) return undefined; if (this.isComment(offset)) return undefined; + const position = this._textBuffer.getPosition(offset); + if (position.character === 0 && !this._textBuffer.hasTextAfterPosition(position)) return rootNode; const indentation = this._textBuffer.getLineIndentationAtOffset(offset); let result = rootNode.getNodeFromOffsetEndInclusive(offset); - if (!result || (result === rootNode && indentation != 0)) { - result = this.findParentNodeByIndentation(offset, indentation); + const parent = this.findParentNodeByIndentation(offset, indentation); + if (!result || (parent && result.offset < parent.offset && result.length > parent.length)) { + result = parent; } return result; } private findParentNodeByIndentation(offset: number, indentation: number): ASTNode | undefined { - if (indentation == 0) { - return this.root; - } - const parentIndentation = indentation - this._configuredIndentation; - const parentLine = this._textBuffer.getPreviousLineNumberWithIndentation(offset, parentIndentation); + if (indentation === 0) return this.root; + const parentIndentation = Math.max(0, indentation - this._indentation); + const parentLine = this._textBuffer.findPreviousLineWithSameIndentation(offset, parentIndentation); const parentOffset = this._textBuffer.getOffsetAt(Position.create(parentLine, parentIndentation)); const rootNode = this.root as ObjectASTNodeImpl; diff --git a/server/packages/yaml-language-service/src/utils/indentationGuesser.ts b/server/packages/yaml-language-service/src/utils/indentationGuesser.ts new file mode 100644 index 0000000..06d53ea --- /dev/null +++ b/server/packages/yaml-language-service/src/utils/indentationGuesser.ts @@ -0,0 +1,235 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// Copied from: https://github.com/Microsoft/vscode/blob/main/src/vs/editor/common/model/indentationGuesser.ts + +import { CharCode } from "../parser/charCode"; +import { ITextBuffer } from "./textBuffer"; + +class SpacesDiffResult { + public spacesDiff = 0; + public looksLikeAlignment = false; +} + +/** + * Compute the diff in spaces between two line's indentation. + */ +function spacesDiff(a: string, aLength: number, b: string, bLength: number, result: SpacesDiffResult): void { + result.spacesDiff = 0; + result.looksLikeAlignment = false; + + // This can go both ways (e.g.): + // - a: "\t" + // - b: "\t " + // => This should count 1 tab and 4 spaces + + let i: number; + + for (i = 0; i < aLength && i < bLength; i++) { + const aCharCode = a.charCodeAt(i); + const bCharCode = b.charCodeAt(i); + + if (aCharCode !== bCharCode) { + break; + } + } + + let aSpacesCnt = 0, + aTabsCount = 0; + for (let j = i; j < aLength; j++) { + const aCharCode = a.charCodeAt(j); + if (aCharCode === CharCode.Space) { + aSpacesCnt++; + } else { + aTabsCount++; + } + } + + let bSpacesCnt = 0, + bTabsCount = 0; + for (let j = i; j < bLength; j++) { + const bCharCode = b.charCodeAt(j); + if (bCharCode === CharCode.Space) { + bSpacesCnt++; + } else { + bTabsCount++; + } + } + + if (aSpacesCnt > 0 && aTabsCount > 0) { + return; + } + if (bSpacesCnt > 0 && bTabsCount > 0) { + return; + } + + const tabsDiff = Math.abs(aTabsCount - bTabsCount); + const spacesDiff = Math.abs(aSpacesCnt - bSpacesCnt); + + if (tabsDiff === 0) { + // check if the indentation difference might be caused by alignment reasons + // sometime folks like to align their code, but this should not be used as a hint + result.spacesDiff = spacesDiff; + + if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length && bSpacesCnt < b.length) { + if (b.charCodeAt(bSpacesCnt) !== CharCode.Space && a.charCodeAt(bSpacesCnt - 1) === CharCode.Space) { + if (a.charCodeAt(a.length - 1) === CharCode.Comma) { + // This looks like an alignment desire: e.g. + // const a = b + c, + // d = b - c; + result.looksLikeAlignment = true; + } + } + } + return; + } + if (spacesDiff % tabsDiff === 0) { + result.spacesDiff = spacesDiff / tabsDiff; + return; + } +} + +/** + * Result for a guessIndentation + */ +export interface IGuessedIndentation { + /** + * If indentation is based on spaces (`insertSpaces` = true), then what is the number of spaces that make an indent? + */ + tabSize: number; + /** + * Is indentation based on spaces? + */ + insertSpaces: boolean; +} + +export function guessIndentation( + source: ITextBuffer, + defaultTabSize: number, + defaultInsertSpaces: boolean +): IGuessedIndentation { + // Look at most at the first 10k lines + const linesCount = Math.min(source.getLineCount(), 10000); + + let linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation + let linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation + + let previousLineText = ""; // content of latest line that contained non-whitespace chars + let previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char + + const ALLOWED_TAB_SIZE_GUESSES = [2, 4, 6, 8, 3, 5, 7]; // prefer even guesses for `tabSize`, limit to [2, 8]. + const MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8 + + const spacesDiffCount = [0, 0, 0, 0, 0, 0, 0, 0, 0]; // `tabSize` scores + const tmp = new SpacesDiffResult(); + + for (let lineNumber = 1; lineNumber <= linesCount; lineNumber++) { + const currentLineLength = source.getLineLength(lineNumber); + const currentLineText = source.getLineContent(lineNumber); + + // if the text buffer is chunk based, so long lines are cons-string, v8 will flattern the string when we check charCode. + // checking charCode on chunks directly is cheaper. + const useCurrentLineText = currentLineLength <= 65536; + + let currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars + let currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char + let currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation + let currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation + for (let j = 0, lenJ = currentLineLength; j < lenJ; j++) { + const charCode = useCurrentLineText ? currentLineText.charCodeAt(j) : source.getLineCharCode(lineNumber, j); + + if (charCode === CharCode.Tab) { + currentLineTabsCount++; + } else if (charCode === CharCode.Space) { + currentLineSpacesCount++; + } else { + // Hit non whitespace character on this line + currentLineHasContent = true; + currentLineIndentation = j; + break; + } + } + + // Ignore empty or only whitespace lines + if (!currentLineHasContent) { + continue; + } + + if (currentLineTabsCount > 0) { + linesIndentedWithTabsCount++; + } else if (currentLineSpacesCount > 1) { + linesIndentedWithSpacesCount++; + } + + spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation, tmp); + + if (tmp.looksLikeAlignment) { + // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation + // + // - item1 + // - item2 + // + // otherwise skip this line entirely + // + // const a = 1, + // b = 2; + + if (!(defaultInsertSpaces && defaultTabSize === tmp.spacesDiff)) { + continue; + } + } + + const currentSpacesDiff = tmp.spacesDiff; + if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) { + spacesDiffCount[currentSpacesDiff]++; + } + + previousLineText = currentLineText; + previousLineIndentation = currentLineIndentation; + } + + let insertSpaces = defaultInsertSpaces; + if (linesIndentedWithTabsCount !== linesIndentedWithSpacesCount) { + insertSpaces = linesIndentedWithTabsCount < linesIndentedWithSpacesCount; + } + + let tabSize = defaultTabSize; + + // Guess tabSize only if inserting spaces... + if (insertSpaces) { + let tabSizeScore = insertSpaces ? 0 : 0.1 * linesCount; + + // console.log("score threshold: " + tabSizeScore); + + ALLOWED_TAB_SIZE_GUESSES.forEach((possibleTabSize) => { + const possibleTabSizeScore = spacesDiffCount[possibleTabSize]; + if (possibleTabSizeScore > tabSizeScore) { + tabSizeScore = possibleTabSizeScore; + tabSize = possibleTabSize; + } + }); + + // Let a tabSize of 2 win even if it is not the maximum + // (only in case 4 was guessed) + if ( + tabSize === 4 && + spacesDiffCount[4] > 0 && + spacesDiffCount[2] > 0 && + spacesDiffCount[2] >= spacesDiffCount[4] / 2 + ) { + tabSize = 2; + } + } + + // console.log('--------------------------'); + // console.log('linesIndentedWithTabsCount: ' + linesIndentedWithTabsCount + ', linesIndentedWithSpacesCount: ' + linesIndentedWithSpacesCount); + // console.log('spacesDiffCount: ' + spacesDiffCount); + // console.log('tabSize: ' + tabSize + ', tabSizeScore: ' + tabSizeScore); + + return { + insertSpaces: insertSpaces, + tabSize: tabSize, + }; +} diff --git a/server/packages/yaml-language-service/src/utils/textBuffer.ts b/server/packages/yaml-language-service/src/utils/textBuffer.ts index baf0ff2..5b0f9d5 100644 --- a/server/packages/yaml-language-service/src/utils/textBuffer.ts +++ b/server/packages/yaml-language-service/src/utils/textBuffer.ts @@ -11,6 +11,13 @@ interface FullTextDocument { getLineOffsets(): number[]; } +export interface ITextBuffer { + getLineCount(): number; + getLineLength(lineNumber: number): number; + getLineCharCode(lineNumber: number, index: number): number; + getLineContent(lineNumber: number): string; +} + export class TextBuffer { constructor(private doc: TextDocument) {} @@ -68,6 +75,11 @@ export class TextBuffer { return text.substring(i + 1, offset); } + public hasTextAfterPosition(position: Position): boolean { + const lineContent = this.getLineContent(position.line); + return lineContent.charAt(position.character + 1).trim() !== ""; + } + public getLineIndentationAtOffset(offset: number): number { const position = this.getPosition(offset); const lineContent = this.getLineContent(position.line); @@ -75,15 +87,15 @@ export class TextBuffer { return indentation; } - public getPreviousLineNumberWithIndentation(offset: number, parentIndentation: number): number { + public findPreviousLineWithSameIndentation(offset: number, indentation: number): number { const position = this.getPosition(offset); - const indentationSpaces = " ".repeat(parentIndentation); + const indentationSpaces = " ".repeat(indentation); let currentLine = position.line - 1; let found = false; while (currentLine > 0 && !found) { const lineContent = this.getLineContent(currentLine); - if (lineContent.startsWith(indentationSpaces)) { + if (lineContent.startsWith(indentationSpaces) && lineContent.charCodeAt(indentation + 1) !== CharCode.Space) { found = true; } else { currentLine--; diff --git a/server/packages/yaml-language-service/tests/unit/yamlParser.test.ts b/server/packages/yaml-language-service/tests/unit/yamlParser.test.ts index aa2d068..af10d40 100644 --- a/server/packages/yaml-language-service/tests/unit/yamlParser.test.ts +++ b/server/packages/yaml-language-service/tests/unit/yamlParser.test.ts @@ -158,3 +158,55 @@ namespace: defaul`; ).toBeDefined(); }); }); + +describe("YamlDocument", () => { + it("should identify comments", () => { + const parsedDocument = parse("# a comment\ntest: test\n# a second comment"); + let isComment = parsedDocument.isComment(11); + expect(isComment).toBe(true); + isComment = parsedDocument.isComment(12); + expect(isComment).toBe(false); + isComment = parsedDocument.isComment(22); + expect(isComment).toBe(false); + isComment = parsedDocument.isComment(23); + expect(isComment).toBe(true); + }); + describe("getNodeFromOffset", () => { + const SAMPLE_DOC = ` +test: value +test02: + prop03: + test04: val + +`; + it.each([ + // [0, "_root_"], // _root_ is not a property, is an object + [1, "test"], + [12, "test"], + [13, "test02"], + [20, "test02"], + [22, "test02"], + [23, "prop03"], + [30, "prop03"], + [31, "_root_"], + [35, "test04"], + [46, "test04"], + [47, "_root_"], + [49, "test02"], + [51, "prop03"], + [53, "test04"], + ])("should return for offset %p the expected node name %p", (offset: number, expectedNodeName: string) => { + const parsedDocument = parse(SAMPLE_DOC); + const node = parsedDocument.getNodeFromOffset(offset); + expect(node).toBeDefined(); + if (expectedNodeName === "_root_") { + expect(node?.parent).toBeUndefined(); + } else { + expect(node?.parent).toBeDefined(); + } + if (node?.type === "property") { + expect(node.keyNode.value).toBe(expectedNodeName); + } + }); + }); +});