Skip to content

Commit

Permalink
Generate mode transitions from TM tokenizer (#14136)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexdima committed Oct 24, 2016
1 parent 8a45895 commit 86b2e52
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 91 deletions.
26 changes: 3 additions & 23 deletions src/vs/editor/common/model/textModelWithTokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,9 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke
var lineNumber = validPosition.lineNumber;
var column = validPosition.column;

if (column === 1) {
return this.getStateBeforeLine(lineNumber).getModeId();
} else if (column === this.getLineMaxColumn(lineNumber)) {
return this.getStateAfterLine(lineNumber).getModeId();
} else {
var modeTransitions = this._getLineModeTransitions(lineNumber);
var modeTransitionIndex = ModeTransition.findIndexInSegmentsArray(modeTransitions, column - 1);
return modeTransitions[modeTransitionIndex].modeId;
}
var modeTransitions = this._getLineModeTransitions(lineNumber);
var modeTransitionIndex = ModeTransition.findIndexInSegmentsArray(modeTransitions, column - 1);
return modeTransitions[modeTransitionIndex].modeId;
}

protected _invalidateLine(lineIndex: number): void {
Expand Down Expand Up @@ -400,20 +394,6 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke
});
}

private getStateBeforeLine(lineNumber: number): IState {
this._withModelTokensChangedEventBuilder((eventBuilder) => {
this._updateTokensUntilLine(eventBuilder, lineNumber - 1, true);
});
return this._lines[lineNumber - 1].getState();
}

private getStateAfterLine(lineNumber: number): IState {
this._withModelTokensChangedEventBuilder((eventBuilder) => {
this._updateTokensUntilLine(eventBuilder, lineNumber, true);
});
return lineNumber < this._lines.length ? this._lines[lineNumber].getState() : this._lastState;
}

_getLineModeTransitions(lineNumber: number): ModeTransition[] {
if (lineNumber < 1 || lineNumber > this.getLineCount()) {
throw new Error('Illegal value ' + lineNumber + ' for `lineNumber`');
Expand Down
33 changes: 26 additions & 7 deletions src/vs/editor/node/textMate/TMSyntax.ts
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ export class TMScopesDecodeData {
/**
* The resolved language.
*/
private readonly language: string;
public readonly language: string;

constructor(parent: TMScopesDecodeData, scope: TMScopeDecodeData) {
// 1) Inherit data from `parent`.
Expand Down Expand Up @@ -345,7 +345,7 @@ export class DecodeMap {
public getToken(tokenMap: boolean[]): string {
let result = '';
let isFirst = true;
for (let i = 1; i <= this.lastAssignedTokenId; i++) {
for (let i = 1, len = tokenMap.length; i < len; i++) {
if (tokenMap[i]) {
if (isFirst) {
isFirst = false;
Expand Down Expand Up @@ -400,34 +400,53 @@ class Tokenizer {
}

export function decodeTextMateTokens(line: string, offsetDelta: number, decodeMap: DecodeMap, resultTokens: IToken[], resultState: TMState): LineTokens {
const topLevelModeId = resultState.getModeId();

// Create the result early and fill in the tokens later
let tokens: Token[] = [];
let modeTransitions: ModeTransition[] = [];

let lastTokenType: string = null;
let lastModeId: string = null;

for (let tokenIndex = 0, len = resultTokens.length; tokenIndex < len; tokenIndex++) {
let token = resultTokens[tokenIndex];
let tokenStartIndex = token.startIndex;
let tokenType = decodeTextMateToken(decodeMap, token.scopes);

let tokenType = '';
let tokenModeId = topLevelModeId;
let decodedToken = decodeTextMateToken(decodeMap, token.scopes);
if (decodedToken) {
tokenType = decodeMap.getToken(decodedToken.tokensMask);
if (decodedToken.language) {
tokenModeId = decodedToken.language;
}
}

// do not push a new token if the type is exactly the same (also helps with ligatures)
if (tokenType !== lastTokenType) {
tokens.push(new Token(tokenStartIndex + offsetDelta, tokenType));
lastTokenType = tokenType;
}

if (tokenModeId !== lastModeId) {
modeTransitions.push(new ModeTransition(tokenStartIndex + offsetDelta, tokenModeId));
lastModeId = tokenModeId;
}
}

return new LineTokens(
tokens,
[new ModeTransition(offsetDelta, resultState.getModeId())],
modeTransitions,
offsetDelta + line.length,
resultState
);
}

export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): string {
export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): TMScopesDecodeData {
if (scopes.length <= 1) {
// fast case
return '';
return null;
}

const prevTokenScopes = decodeMap.prevTokenScopes;
Expand Down Expand Up @@ -456,5 +475,5 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str
}

decodeMap.prevTokenScopes = resultScopes;
return decodeMap.getToken(lastResultScope.tokensMask);
return lastResultScope;
}
173 changes: 112 additions & 61 deletions src/vs/editor/test/node/textMate/TMSyntax.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,69 +11,109 @@ import { TMState } from 'vs/editor/common/modes/TMState';
suite('TextMate.TMScopeRegistry', () => {

test('getFilePath', () => {
let manager = new TMScopeRegistry();

manager.register('a', 'source.a', './grammar/a.tmLanguage');
assert.equal(manager.getFilePath('source.a'), './grammar/a.tmLanguage');
assert.equal(manager.getFilePath('a'), null);
assert.equal(manager.getFilePath('source.b'), null);
assert.equal(manager.getFilePath('b'), null);

manager.register('b', 'source.b', './grammar/b.tmLanguage');
assert.equal(manager.getFilePath('source.a'), './grammar/a.tmLanguage');
assert.equal(manager.getFilePath('a'), null);
assert.equal(manager.getFilePath('source.b'), './grammar/b.tmLanguage');
assert.equal(manager.getFilePath('b'), null);

manager.register('a', 'source.a', './grammar/ax.tmLanguage');
assert.equal(manager.getFilePath('source.a'), './grammar/ax.tmLanguage');
assert.equal(manager.getFilePath('a'), null);
assert.equal(manager.getFilePath('source.b'), './grammar/b.tmLanguage');
assert.equal(manager.getFilePath('b'), null);
let registry = new TMScopeRegistry();

registry.register('a', 'source.a', './grammar/a.tmLanguage');
assert.equal(registry.getFilePath('source.a'), './grammar/a.tmLanguage');
assert.equal(registry.getFilePath('a'), null);
assert.equal(registry.getFilePath('source.b'), null);
assert.equal(registry.getFilePath('b'), null);

registry.register('b', 'source.b', './grammar/b.tmLanguage');
assert.equal(registry.getFilePath('source.a'), './grammar/a.tmLanguage');
assert.equal(registry.getFilePath('a'), null);
assert.equal(registry.getFilePath('source.b'), './grammar/b.tmLanguage');
assert.equal(registry.getFilePath('b'), null);

registry.register('a', 'source.a', './grammar/ax.tmLanguage');
assert.equal(registry.getFilePath('source.a'), './grammar/ax.tmLanguage');
assert.equal(registry.getFilePath('a'), null);
assert.equal(registry.getFilePath('source.b'), './grammar/b.tmLanguage');
assert.equal(registry.getFilePath('b'), null);
});

test('scopeToLanguage', () => {
let manager = new TMScopeRegistry();
let registry = new TMScopeRegistry();

assert.equal(manager.scopeToLanguage('source.html'), null);
assert.equal(registry.scopeToLanguage('source.html'), null);

manager.register('html', 'source.html', null);
manager.register('c', 'source.c', null);
manager.register('css', 'source.css', null);
manager.register('javascript', 'source.js', null);
manager.register('python', 'source.python', null);
manager.register('smarty', 'source.smarty', null);
manager.register(null, 'source.baz', null);
registry.register('html', 'source.html', null);
registry.register('c', 'source.c', null);
registry.register('css', 'source.css', null);
registry.register('javascript', 'source.js', null);
registry.register('python', 'source.python', null);
registry.register('smarty', 'source.smarty', null);
registry.register(null, 'source.baz', null);

// exact matches
assert.equal(manager.scopeToLanguage('source.html'), 'html');
assert.equal(manager.scopeToLanguage('source.css'), 'css');
assert.equal(manager.scopeToLanguage('source.c'), 'c');
assert.equal(manager.scopeToLanguage('source.js'), 'javascript');
assert.equal(manager.scopeToLanguage('source.python'), 'python');
assert.equal(manager.scopeToLanguage('source.smarty'), 'smarty');
assert.equal(registry.scopeToLanguage('source.html'), 'html');
assert.equal(registry.scopeToLanguage('source.css'), 'css');
assert.equal(registry.scopeToLanguage('source.c'), 'c');
assert.equal(registry.scopeToLanguage('source.js'), 'javascript');
assert.equal(registry.scopeToLanguage('source.python'), 'python');
assert.equal(registry.scopeToLanguage('source.smarty'), 'smarty');

// prefix matches
assert.equal(manager.scopeToLanguage('source.css.embedded.html'), 'css');
assert.equal(manager.scopeToLanguage('source.js.embedded.html'), 'javascript');
assert.equal(manager.scopeToLanguage('source.python.embedded.html'), 'python');
assert.equal(manager.scopeToLanguage('source.smarty.embedded.html'), 'smarty');
assert.equal(registry.scopeToLanguage('source.css.embedded.html'), 'css');
assert.equal(registry.scopeToLanguage('source.js.embedded.html'), 'javascript');
assert.equal(registry.scopeToLanguage('source.python.embedded.html'), 'python');
assert.equal(registry.scopeToLanguage('source.smarty.embedded.html'), 'smarty');

// misses
assert.equal(manager.scopeToLanguage('source.ts'), null);
assert.equal(manager.scopeToLanguage('source.csss'), null);
assert.equal(manager.scopeToLanguage('source.baz'), null);
assert.equal(manager.scopeToLanguage('asource.css'), null);
assert.equal(manager.scopeToLanguage('a.source.css'), null);
assert.equal(manager.scopeToLanguage('source_css'), null);
assert.equal(manager.scopeToLanguage('punctuation.definition.tag.html'), null);
assert.equal(registry.scopeToLanguage('source.ts'), null);
assert.equal(registry.scopeToLanguage('source.csss'), null);
assert.equal(registry.scopeToLanguage('source.baz'), null);
assert.equal(registry.scopeToLanguage('asource.css'), null);
assert.equal(registry.scopeToLanguage('a.source.css'), null);
assert.equal(registry.scopeToLanguage('source_css'), null);
assert.equal(registry.scopeToLanguage('punctuation.definition.tag.html'), null);
});

});

suite('TextMate.decodeTextMateTokens', () => {

test('html and embedded modes', () => {
test('embedded modes', () => {
let registry = new TMScopeRegistry();

registry.register('html', 'source.html', null);
registry.register('c', 'source.c', null);
registry.register('css', 'source.css', null);
registry.register('javascript', 'source.js', null);
registry.register('python', 'source.python', null);
registry.register('smarty', 'source.smarty', null);
registry.register(null, 'source.baz', null);

let decodeMap = new DecodeMap(registry);
let actual = decodeTextMateTokens(
'text<style>body{}</style><script>var x=3;</script>text',
0,
decodeMap,
[
{ startIndex: 0, endIndex: 4, scopes: ['source.html'] },
{ startIndex: 4, endIndex: 11, scopes: ['source.html', 'style.tag.open'] },
{ startIndex: 11, endIndex: 17, scopes: ['source.html', 'source.css'] },
{ startIndex: 17, endIndex: 25, scopes: ['source.html', 'style.tag.close'] },
{ startIndex: 25, endIndex: 33, scopes: ['source.html', 'script.tag.open'] },
{ startIndex: 33, endIndex: 41, scopes: ['source.html', 'source.js'] },
{ startIndex: 41, endIndex: 50, scopes: ['source.html', 'script.tag.close'] },
{ startIndex: 50, endIndex: 54, scopes: ['source.html'] },
],
new TMState('html', null, null)
);

let actualModeTransitions = actual.modeTransitions.map((t) => { return { startIndex: t.startIndex, modeId: t.modeId }; });

assert.deepEqual(actualModeTransitions, [
{ startIndex: 0, modeId: 'html' },
{ startIndex: 11, modeId: 'css' },
{ startIndex: 17, modeId: 'html' },
{ startIndex: 33, modeId: 'javascript' },
{ startIndex: 41, modeId: 'html' },
]);
});

test('html and embedded', () => {

var tests = [
{
Expand Down Expand Up @@ -233,7 +273,7 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 22, type: 'html.punctuation.definition.end.string.quoted.double.source.css.embedded' },
{ startIndex: 23, type: 'tag.html.punctuation.definition.source.css.embedded' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
}, {
line: '\t\th1 {',
tmTokens: [
Expand All @@ -248,7 +288,7 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 4, type: 'meta.html.source.css.embedded.selector' },
{ startIndex: 5, type: 'meta.html.punctuation.begin.source.css.embedded.property-list.section' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
}, {
line: '\t\t\tcolor: #CCA3A3;',
tmTokens: [
Expand All @@ -269,7 +309,7 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 11, type: 'meta.html.other.source.css.embedded.property-list.property-value.constant.color.rgb-value' },
{ startIndex: 17, type: 'meta.html.punctuation.source.css.embedded.property-list.property-value.terminator.rule' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
}, {
line: '\t\t}',
tmTokens: [
Expand All @@ -280,7 +320,7 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 0, type: 'meta.html.source.css.embedded.property-list' },
{ startIndex: 2, type: 'meta.html.punctuation.end.source.css.embedded.property-list.section' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
}, {
line: '\t</style>',
tmTokens: [
Expand All @@ -295,7 +335,7 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 3, type: 'tag.html.entity.name.source.css.embedded.style' },
{ startIndex: 8, type: 'tag.html.punctuation.definition.source.css.embedded' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
}, {
line: '\t<script type=\"text/javascript\">',
tmTokens: [
Expand All @@ -322,7 +362,7 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 30, type: 'html.punctuation.definition.end.string.quoted.double.source.embedded.js' },
{ startIndex: 31, type: 'tag.html.punctuation.definition.source.embedded.js' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'javascript' }]
}, {
line: '\t\twindow.alert(\"I am a sample...\");',
tmTokens: [
Expand All @@ -349,22 +389,24 @@ suite('TextMate.decodeTextMateTokens', () => {
{ startIndex: 33, type: 'meta.html.source.embedded.js.brace.round' },
{ startIndex: 34, type: 'html.punctuation.source.embedded.terminator.js.statement' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'javascript' }]
}, {
line: '\t</script>',
line: '\t</script>After',
tmTokens: [
{ startIndex: 0, endIndex: 1, scopes: ['text.html.basic', 'source.js.embedded.html'] },
{ startIndex: 1, endIndex: 3, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] },
{ startIndex: 3, endIndex: 9, scopes: ['text.html.basic', 'source.js.embedded.html', 'entity.name.tag.script.html'] },
{ startIndex: 9, endIndex: 10, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] }
{ startIndex: 9, endIndex: 10, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] },
{ startIndex: 10, endIndex: 15, scopes: ['text.html.basic'] }
],
tokens: [
{ startIndex: 0, type: 'html.source.embedded.js' },
{ startIndex: 1, type: 'tag.html.punctuation.definition.source.embedded.js' },
{ startIndex: 3, type: 'tag.html.entity.name.source.embedded.js.script' },
{ startIndex: 9, type: 'tag.html.punctuation.definition.source.embedded.js' },
{ startIndex: 10, type: '' },
],
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
modeTransitions: [{ startIndex: 0, modeId: 'javascript' }, { startIndex: 10, modeId: 'html' }]
}, {
line: '</head>',
tmTokens: [
Expand Down Expand Up @@ -486,15 +528,23 @@ suite('TextMate.decodeTextMateTokens', () => {
}
];

let decodeMap = new DecodeMap(new TMScopeRegistry());
let registry = new TMScopeRegistry();

registry.register('html', 'source.html', null);
registry.register('c', 'source.c', null);
registry.register('css', 'source.css', null);
registry.register('javascript', 'source.js', null);
registry.register('python', 'source.python', null);
registry.register('smarty', 'source.smarty', null);
registry.register(null, 'source.baz', null);

let state = new TMState('html', null, null);
let decodeMap = new DecodeMap(registry);

for (let i = 0, len = tests.length; i < len; i++) {
let test = tests[i];
let actual = decodeTextMateTokens(test.line, 0, decodeMap, test.tmTokens, state);
let actual = decodeTextMateTokens(test.line, 0, decodeMap, test.tmTokens, new TMState('html', null, null));

let actualTokens = actual.tokens.map((t) => { return { startIndex: t.startIndex, type: t.type}; });
let actualTokens = actual.tokens.map((t) => { return { startIndex: t.startIndex, type: t.type }; });
let actualModeTransitions = actual.modeTransitions.map((t) => { return { startIndex: t.startIndex, modeId: t.modeId }; });

assert.deepEqual(actualTokens, test.tokens, 'test ' + test.line);
Expand Down Expand Up @@ -526,7 +576,8 @@ suite('textMate', () => {
}

function testOneDecodeTextMateToken(decodeMap: DecodeMap, scopes: string[], expected: string): void {
let actual = decodeTextMateToken(decodeMap, scopes);
let actualDecodedToken = decodeTextMateToken(decodeMap, scopes);
let actual = actualDecodedToken ? decodeMap.getToken(actualDecodedToken.tokensMask) : '';
assert.equal(actual, expected);

// Sanity-check
Expand Down

0 comments on commit 86b2e52

Please sign in to comment.