From 86b2e52b0cd2317fc02517711e99cb510d660d23 Mon Sep 17 00:00:00 2001 From: Alex Dima Date: Mon, 24 Oct 2016 16:10:56 +0200 Subject: [PATCH] Generate mode transitions from TM tokenizer (#14136) --- .../common/model/textModelWithTokens.ts | 26 +-- src/vs/editor/node/textMate/TMSyntax.ts | 33 +++- .../test/node/textMate/TMSyntax.test.ts | 173 ++++++++++++------ 3 files changed, 141 insertions(+), 91 deletions(-) diff --git a/src/vs/editor/common/model/textModelWithTokens.ts b/src/vs/editor/common/model/textModelWithTokens.ts index d5477b5f135ed..e208747451ad6 100644 --- a/src/vs/editor/common/model/textModelWithTokens.ts +++ b/src/vs/editor/common/model/textModelWithTokens.ts @@ -296,15 +296,9 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke var lineNumber = validPosition.lineNumber; var column = validPosition.column; - if (column === 1) { - return this.getStateBeforeLine(lineNumber).getModeId(); - } else if (column === this.getLineMaxColumn(lineNumber)) { - return this.getStateAfterLine(lineNumber).getModeId(); - } else { - var modeTransitions = this._getLineModeTransitions(lineNumber); - var modeTransitionIndex = ModeTransition.findIndexInSegmentsArray(modeTransitions, column - 1); - return modeTransitions[modeTransitionIndex].modeId; - } + var modeTransitions = this._getLineModeTransitions(lineNumber); + var modeTransitionIndex = ModeTransition.findIndexInSegmentsArray(modeTransitions, column - 1); + return modeTransitions[modeTransitionIndex].modeId; } protected _invalidateLine(lineIndex: number): void { @@ -400,20 +394,6 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke }); } - private getStateBeforeLine(lineNumber: number): IState { - this._withModelTokensChangedEventBuilder((eventBuilder) => { - this._updateTokensUntilLine(eventBuilder, lineNumber - 1, true); - }); - return this._lines[lineNumber - 1].getState(); - } - - private getStateAfterLine(lineNumber: number): IState { - this._withModelTokensChangedEventBuilder((eventBuilder) => { - this._updateTokensUntilLine(eventBuilder, lineNumber, true); - }); - return lineNumber < this._lines.length ? this._lines[lineNumber].getState() : this._lastState; - } - _getLineModeTransitions(lineNumber: number): ModeTransition[] { if (lineNumber < 1 || lineNumber > this.getLineCount()) { throw new Error('Illegal value ' + lineNumber + ' for `lineNumber`'); diff --git a/src/vs/editor/node/textMate/TMSyntax.ts b/src/vs/editor/node/textMate/TMSyntax.ts index 2eea8a1cddfde..f02db32f4aad8 100644 --- a/src/vs/editor/node/textMate/TMSyntax.ts +++ b/src/vs/editor/node/textMate/TMSyntax.ts @@ -266,7 +266,7 @@ export class TMScopesDecodeData { /** * The resolved language. */ - private readonly language: string; + public readonly language: string; constructor(parent: TMScopesDecodeData, scope: TMScopeDecodeData) { // 1) Inherit data from `parent`. @@ -345,7 +345,7 @@ export class DecodeMap { public getToken(tokenMap: boolean[]): string { let result = ''; let isFirst = true; - for (let i = 1; i <= this.lastAssignedTokenId; i++) { + for (let i = 1, len = tokenMap.length; i < len; i++) { if (tokenMap[i]) { if (isFirst) { isFirst = false; @@ -400,34 +400,53 @@ class Tokenizer { } export function decodeTextMateTokens(line: string, offsetDelta: number, decodeMap: DecodeMap, resultTokens: IToken[], resultState: TMState): LineTokens { + const topLevelModeId = resultState.getModeId(); + // Create the result early and fill in the tokens later let tokens: Token[] = []; + let modeTransitions: ModeTransition[] = []; let lastTokenType: string = null; + let lastModeId: string = null; + for (let tokenIndex = 0, len = resultTokens.length; tokenIndex < len; tokenIndex++) { let token = resultTokens[tokenIndex]; let tokenStartIndex = token.startIndex; - let tokenType = decodeTextMateToken(decodeMap, token.scopes); + + let tokenType = ''; + let tokenModeId = topLevelModeId; + let decodedToken = decodeTextMateToken(decodeMap, token.scopes); + if (decodedToken) { + tokenType = decodeMap.getToken(decodedToken.tokensMask); + if (decodedToken.language) { + tokenModeId = decodedToken.language; + } + } // do not push a new token if the type is exactly the same (also helps with ligatures) if (tokenType !== lastTokenType) { tokens.push(new Token(tokenStartIndex + offsetDelta, tokenType)); lastTokenType = tokenType; } + + if (tokenModeId !== lastModeId) { + modeTransitions.push(new ModeTransition(tokenStartIndex + offsetDelta, tokenModeId)); + lastModeId = tokenModeId; + } } return new LineTokens( tokens, - [new ModeTransition(offsetDelta, resultState.getModeId())], + modeTransitions, offsetDelta + line.length, resultState ); } -export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): string { +export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): TMScopesDecodeData { if (scopes.length <= 1) { // fast case - return ''; + return null; } const prevTokenScopes = decodeMap.prevTokenScopes; @@ -456,5 +475,5 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str } decodeMap.prevTokenScopes = resultScopes; - return decodeMap.getToken(lastResultScope.tokensMask); + return lastResultScope; } diff --git a/src/vs/editor/test/node/textMate/TMSyntax.test.ts b/src/vs/editor/test/node/textMate/TMSyntax.test.ts index 863572eb81406..dd51ca2bda3fb 100644 --- a/src/vs/editor/test/node/textMate/TMSyntax.test.ts +++ b/src/vs/editor/test/node/textMate/TMSyntax.test.ts @@ -11,69 +11,109 @@ import { TMState } from 'vs/editor/common/modes/TMState'; suite('TextMate.TMScopeRegistry', () => { test('getFilePath', () => { - let manager = new TMScopeRegistry(); - - manager.register('a', 'source.a', './grammar/a.tmLanguage'); - assert.equal(manager.getFilePath('source.a'), './grammar/a.tmLanguage'); - assert.equal(manager.getFilePath('a'), null); - assert.equal(manager.getFilePath('source.b'), null); - assert.equal(manager.getFilePath('b'), null); - - manager.register('b', 'source.b', './grammar/b.tmLanguage'); - assert.equal(manager.getFilePath('source.a'), './grammar/a.tmLanguage'); - assert.equal(manager.getFilePath('a'), null); - assert.equal(manager.getFilePath('source.b'), './grammar/b.tmLanguage'); - assert.equal(manager.getFilePath('b'), null); - - manager.register('a', 'source.a', './grammar/ax.tmLanguage'); - assert.equal(manager.getFilePath('source.a'), './grammar/ax.tmLanguage'); - assert.equal(manager.getFilePath('a'), null); - assert.equal(manager.getFilePath('source.b'), './grammar/b.tmLanguage'); - assert.equal(manager.getFilePath('b'), null); + let registry = new TMScopeRegistry(); + + registry.register('a', 'source.a', './grammar/a.tmLanguage'); + assert.equal(registry.getFilePath('source.a'), './grammar/a.tmLanguage'); + assert.equal(registry.getFilePath('a'), null); + assert.equal(registry.getFilePath('source.b'), null); + assert.equal(registry.getFilePath('b'), null); + + registry.register('b', 'source.b', './grammar/b.tmLanguage'); + assert.equal(registry.getFilePath('source.a'), './grammar/a.tmLanguage'); + assert.equal(registry.getFilePath('a'), null); + assert.equal(registry.getFilePath('source.b'), './grammar/b.tmLanguage'); + assert.equal(registry.getFilePath('b'), null); + + registry.register('a', 'source.a', './grammar/ax.tmLanguage'); + assert.equal(registry.getFilePath('source.a'), './grammar/ax.tmLanguage'); + assert.equal(registry.getFilePath('a'), null); + assert.equal(registry.getFilePath('source.b'), './grammar/b.tmLanguage'); + assert.equal(registry.getFilePath('b'), null); }); test('scopeToLanguage', () => { - let manager = new TMScopeRegistry(); + let registry = new TMScopeRegistry(); - assert.equal(manager.scopeToLanguage('source.html'), null); + assert.equal(registry.scopeToLanguage('source.html'), null); - manager.register('html', 'source.html', null); - manager.register('c', 'source.c', null); - manager.register('css', 'source.css', null); - manager.register('javascript', 'source.js', null); - manager.register('python', 'source.python', null); - manager.register('smarty', 'source.smarty', null); - manager.register(null, 'source.baz', null); + registry.register('html', 'source.html', null); + registry.register('c', 'source.c', null); + registry.register('css', 'source.css', null); + registry.register('javascript', 'source.js', null); + registry.register('python', 'source.python', null); + registry.register('smarty', 'source.smarty', null); + registry.register(null, 'source.baz', null); // exact matches - assert.equal(manager.scopeToLanguage('source.html'), 'html'); - assert.equal(manager.scopeToLanguage('source.css'), 'css'); - assert.equal(manager.scopeToLanguage('source.c'), 'c'); - assert.equal(manager.scopeToLanguage('source.js'), 'javascript'); - assert.equal(manager.scopeToLanguage('source.python'), 'python'); - assert.equal(manager.scopeToLanguage('source.smarty'), 'smarty'); + assert.equal(registry.scopeToLanguage('source.html'), 'html'); + assert.equal(registry.scopeToLanguage('source.css'), 'css'); + assert.equal(registry.scopeToLanguage('source.c'), 'c'); + assert.equal(registry.scopeToLanguage('source.js'), 'javascript'); + assert.equal(registry.scopeToLanguage('source.python'), 'python'); + assert.equal(registry.scopeToLanguage('source.smarty'), 'smarty'); // prefix matches - assert.equal(manager.scopeToLanguage('source.css.embedded.html'), 'css'); - assert.equal(manager.scopeToLanguage('source.js.embedded.html'), 'javascript'); - assert.equal(manager.scopeToLanguage('source.python.embedded.html'), 'python'); - assert.equal(manager.scopeToLanguage('source.smarty.embedded.html'), 'smarty'); + assert.equal(registry.scopeToLanguage('source.css.embedded.html'), 'css'); + assert.equal(registry.scopeToLanguage('source.js.embedded.html'), 'javascript'); + assert.equal(registry.scopeToLanguage('source.python.embedded.html'), 'python'); + assert.equal(registry.scopeToLanguage('source.smarty.embedded.html'), 'smarty'); // misses - assert.equal(manager.scopeToLanguage('source.ts'), null); - assert.equal(manager.scopeToLanguage('source.csss'), null); - assert.equal(manager.scopeToLanguage('source.baz'), null); - assert.equal(manager.scopeToLanguage('asource.css'), null); - assert.equal(manager.scopeToLanguage('a.source.css'), null); - assert.equal(manager.scopeToLanguage('source_css'), null); - assert.equal(manager.scopeToLanguage('punctuation.definition.tag.html'), null); + assert.equal(registry.scopeToLanguage('source.ts'), null); + assert.equal(registry.scopeToLanguage('source.csss'), null); + assert.equal(registry.scopeToLanguage('source.baz'), null); + assert.equal(registry.scopeToLanguage('asource.css'), null); + assert.equal(registry.scopeToLanguage('a.source.css'), null); + assert.equal(registry.scopeToLanguage('source_css'), null); + assert.equal(registry.scopeToLanguage('punctuation.definition.tag.html'), null); }); }); suite('TextMate.decodeTextMateTokens', () => { - test('html and embedded modes', () => { + test('embedded modes', () => { + let registry = new TMScopeRegistry(); + + registry.register('html', 'source.html', null); + registry.register('c', 'source.c', null); + registry.register('css', 'source.css', null); + registry.register('javascript', 'source.js', null); + registry.register('python', 'source.python', null); + registry.register('smarty', 'source.smarty', null); + registry.register(null, 'source.baz', null); + + let decodeMap = new DecodeMap(registry); + let actual = decodeTextMateTokens( + 'texttext', + 0, + decodeMap, + [ + { startIndex: 0, endIndex: 4, scopes: ['source.html'] }, + { startIndex: 4, endIndex: 11, scopes: ['source.html', 'style.tag.open'] }, + { startIndex: 11, endIndex: 17, scopes: ['source.html', 'source.css'] }, + { startIndex: 17, endIndex: 25, scopes: ['source.html', 'style.tag.close'] }, + { startIndex: 25, endIndex: 33, scopes: ['source.html', 'script.tag.open'] }, + { startIndex: 33, endIndex: 41, scopes: ['source.html', 'source.js'] }, + { startIndex: 41, endIndex: 50, scopes: ['source.html', 'script.tag.close'] }, + { startIndex: 50, endIndex: 54, scopes: ['source.html'] }, + ], + new TMState('html', null, null) + ); + + let actualModeTransitions = actual.modeTransitions.map((t) => { return { startIndex: t.startIndex, modeId: t.modeId }; }); + + assert.deepEqual(actualModeTransitions, [ + { startIndex: 0, modeId: 'html' }, + { startIndex: 11, modeId: 'css' }, + { startIndex: 17, modeId: 'html' }, + { startIndex: 33, modeId: 'javascript' }, + { startIndex: 41, modeId: 'html' }, + ]); + }); + + test('html and embedded', () => { var tests = [ { @@ -233,7 +273,7 @@ suite('TextMate.decodeTextMateTokens', () => { { startIndex: 22, type: 'html.punctuation.definition.end.string.quoted.double.source.css.embedded' }, { startIndex: 23, type: 'tag.html.punctuation.definition.source.css.embedded' }, ], - modeTransitions: [{ startIndex: 0, modeId: 'html' }] + modeTransitions: [{ startIndex: 0, modeId: 'css' }] }, { line: '\t\th1 {', tmTokens: [ @@ -248,7 +288,7 @@ suite('TextMate.decodeTextMateTokens', () => { { startIndex: 4, type: 'meta.html.source.css.embedded.selector' }, { startIndex: 5, type: 'meta.html.punctuation.begin.source.css.embedded.property-list.section' }, ], - modeTransitions: [{ startIndex: 0, modeId: 'html' }] + modeTransitions: [{ startIndex: 0, modeId: 'css' }] }, { line: '\t\t\tcolor: #CCA3A3;', tmTokens: [ @@ -269,7 +309,7 @@ suite('TextMate.decodeTextMateTokens', () => { { startIndex: 11, type: 'meta.html.other.source.css.embedded.property-list.property-value.constant.color.rgb-value' }, { startIndex: 17, type: 'meta.html.punctuation.source.css.embedded.property-list.property-value.terminator.rule' }, ], - modeTransitions: [{ startIndex: 0, modeId: 'html' }] + modeTransitions: [{ startIndex: 0, modeId: 'css' }] }, { line: '\t\t}', tmTokens: [ @@ -280,7 +320,7 @@ suite('TextMate.decodeTextMateTokens', () => { { startIndex: 0, type: 'meta.html.source.css.embedded.property-list' }, { startIndex: 2, type: 'meta.html.punctuation.end.source.css.embedded.property-list.section' }, ], - modeTransitions: [{ startIndex: 0, modeId: 'html' }] + modeTransitions: [{ startIndex: 0, modeId: 'css' }] }, { line: '\t', tmTokens: [ @@ -295,7 +335,7 @@ suite('TextMate.decodeTextMateTokens', () => { { startIndex: 3, type: 'tag.html.entity.name.source.css.embedded.style' }, { startIndex: 8, type: 'tag.html.punctuation.definition.source.css.embedded' }, ], - modeTransitions: [{ startIndex: 0, modeId: 'html' }] + modeTransitions: [{ startIndex: 0, modeId: 'css' }] }, { line: '\t', + line: '\tAfter', tmTokens: [ { startIndex: 0, endIndex: 1, scopes: ['text.html.basic', 'source.js.embedded.html'] }, { startIndex: 1, endIndex: 3, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] }, { startIndex: 3, endIndex: 9, scopes: ['text.html.basic', 'source.js.embedded.html', 'entity.name.tag.script.html'] }, - { startIndex: 9, endIndex: 10, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] } + { startIndex: 9, endIndex: 10, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] }, + { startIndex: 10, endIndex: 15, scopes: ['text.html.basic'] } ], tokens: [ { startIndex: 0, type: 'html.source.embedded.js' }, { startIndex: 1, type: 'tag.html.punctuation.definition.source.embedded.js' }, { startIndex: 3, type: 'tag.html.entity.name.source.embedded.js.script' }, { startIndex: 9, type: 'tag.html.punctuation.definition.source.embedded.js' }, + { startIndex: 10, type: '' }, ], - modeTransitions: [{ startIndex: 0, modeId: 'html' }] + modeTransitions: [{ startIndex: 0, modeId: 'javascript' }, { startIndex: 10, modeId: 'html' }] }, { line: '', tmTokens: [ @@ -486,15 +528,23 @@ suite('TextMate.decodeTextMateTokens', () => { } ]; - let decodeMap = new DecodeMap(new TMScopeRegistry()); + let registry = new TMScopeRegistry(); + + registry.register('html', 'source.html', null); + registry.register('c', 'source.c', null); + registry.register('css', 'source.css', null); + registry.register('javascript', 'source.js', null); + registry.register('python', 'source.python', null); + registry.register('smarty', 'source.smarty', null); + registry.register(null, 'source.baz', null); - let state = new TMState('html', null, null); + let decodeMap = new DecodeMap(registry); for (let i = 0, len = tests.length; i < len; i++) { let test = tests[i]; - let actual = decodeTextMateTokens(test.line, 0, decodeMap, test.tmTokens, state); + let actual = decodeTextMateTokens(test.line, 0, decodeMap, test.tmTokens, new TMState('html', null, null)); - let actualTokens = actual.tokens.map((t) => { return { startIndex: t.startIndex, type: t.type}; }); + let actualTokens = actual.tokens.map((t) => { return { startIndex: t.startIndex, type: t.type }; }); let actualModeTransitions = actual.modeTransitions.map((t) => { return { startIndex: t.startIndex, modeId: t.modeId }; }); assert.deepEqual(actualTokens, test.tokens, 'test ' + test.line); @@ -526,7 +576,8 @@ suite('textMate', () => { } function testOneDecodeTextMateToken(decodeMap: DecodeMap, scopes: string[], expected: string): void { - let actual = decodeTextMateToken(decodeMap, scopes); + let actualDecodedToken = decodeTextMateToken(decodeMap, scopes); + let actual = actualDecodedToken ? decodeMap.getToken(actualDecodedToken.tokensMask) : ''; assert.equal(actual, expected); // Sanity-check