From ce9c567e5bfb441bb6415699a6b6fa797bc08f2e Mon Sep 17 00:00:00 2001 From: Timothee Guerin Date: Wed, 27 Mar 2024 10:12:29 -0700 Subject: [PATCH] Improve TM language for enums, unions and escaped identifiers (#3069) resolves https://github.com/microsoft/typespec/issues/3070 The tm language wasn't correctly defining the syntax for escaped identifiers ``` `North West` ``` Enums and unions syntax was also not very accurate and didn't tokenize everything correctly ## Example of issue before ![image](https://github.com/microsoft/typespec/assets/1031227/ee60f4fe-5eef-4f86-baa5-1cc96b665526) ## After ![image](https://github.com/microsoft/typespec/assets/1031227/38405f18-0fa4-44ba-82f6-594bcd4fcebf) --- ...-escpaed-identifiers-2024-2-27-16-22-28.md | 8 + grammars/typespec.json | 180 ++++++++++++++---- packages/compiler/src/server/classify.ts | 3 + packages/compiler/src/server/tmlanguage.ts | 76 +++++++- .../compiler/test/server/colorization.test.ts | 120 ++++++++++++ 5 files changed, 343 insertions(+), 44 deletions(-) create mode 100644 .chronus/changes/tmlanguage-escpaed-identifiers-2024-2-27-16-22-28.md diff --git a/.chronus/changes/tmlanguage-escpaed-identifiers-2024-2-27-16-22-28.md b/.chronus/changes/tmlanguage-escpaed-identifiers-2024-2-27-16-22-28.md new file mode 100644 index 0000000000..d485c06134 --- /dev/null +++ b/.chronus/changes/tmlanguage-escpaed-identifiers-2024-2-27-16-22-28.md @@ -0,0 +1,8 @@ +--- +# Change versionKind to one of: internal, fix, dependencies, feature, deprecation, breaking +changeKind: fix +packages: + - "@typespec/compiler" +--- + +TmLanguage: Fix tokenization of escaped identifiers, enums and unions diff --git a/grammars/typespec.json b/grammars/typespec.json index ba5b8f4b35..c2ab257ac0 100644 --- a/grammars/typespec.json +++ b/grammars/typespec.json @@ -3,7 +3,7 @@ "name": "TypeSpec", "scopeName": "source.tsp", "fileTypes": [ - ".tsp" + "tsp" ], "patterns": [ { @@ -85,7 +85,7 @@ }, "decorator-declaration-statement": { "name": "meta.decorator-declaration-statement.typespec", - "begin": "(?:(extern)\\s+)?\\b(dec)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "(?:(extern)\\s+)?\\b(dec)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" @@ -160,7 +160,7 @@ }, "doc-comment-param": { "name": "comment.block.tsp", - "match": "(?x)((@)(?:param|template))\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)\\b", + "match": "(?x)((@)(?:param|template))\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)\\b", "captures": { "1": { "name": "keyword.tag.tspdoc" @@ -187,7 +187,7 @@ }, "doc-comment-unknown-tag": { "name": "comment.block.tsp", - "match": "(?x)((@)(?:\\b[_$[:alpha:]][_$[:alnum:]]*\\b))\\b", + "match": "(?x)((@)(?:\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`))\\b", "captures": { "1": { "name": "entity.name.tag.tsp" @@ -236,12 +236,68 @@ } ] }, + "enum-body": { + "name": "meta.enum-body.typespec", + "begin": "\\{", + "beginCaptures": { + "0": { + "name": "punctuation.curlybrace.open.tsp" + } + }, + "end": "\\}", + "endCaptures": { + "0": { + "name": "punctuation.curlybrace.close.tsp" + } + }, + "patterns": [ + { + "include": "#enum-member" + }, + { + "include": "#token" + }, + { + "include": "#directive" + }, + { + "include": "#decorator" + }, + { + "include": "#punctuation-comma" + } + ] + }, + "enum-member": { + "name": "meta.enum-member.typespec", + "begin": "(?:(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)\\s*(:?))", + "beginCaptures": { + "1": { + "name": "variable.name.tsp" + }, + "2": { + "name": "keyword.operator.type.annotation.tsp" + } + }, + "end": "(?=,|;|@|\\)|\\}|\\b(?:extern)\\b|\\b(?:namespace|model|op|using|import|enum|alias|union|interface|dec|fn)\\b)", + "patterns": [ + { + "include": "#token" + }, + { + "include": "#type-annotation" + } + ] + }, "enum-statement": { "name": "meta.enum-statement.typespec", - "begin": "\\b(enum)\\b", + "begin": "\\b(enum)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" + }, + "2": { + "name": "entity.name.type.tsp" } }, "end": "(?<=\\})|(?=,|;|@|\\)|\\}|\\b(?:extern)\\b|\\b(?:namespace|model|op|using|import|enum|alias|union|interface|dec|fn)\\b)", @@ -250,7 +306,7 @@ "include": "#token" }, { - "include": "#expression" + "include": "#enum-body" } ] }, @@ -288,7 +344,7 @@ }, "function-call": { "name": "meta.function-call.typespec", - "begin": "(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)\\s*(\\()", + "begin": "(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)\\s*(\\()", "beginCaptures": { "1": { "name": "entity.name.function.tsp" @@ -311,7 +367,7 @@ }, "function-declaration-statement": { "name": "meta.function-declaration-statement.typespec", - "begin": "(?:(extern)\\s+)?\\b(fn)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "(?:(extern)\\s+)?\\b(fn)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" @@ -338,7 +394,7 @@ }, "identifier-expression": { "name": "entity.name.type.tsp", - "match": "\\b[_$[:alpha:]][_$[:alnum:]]*\\b" + "match": "\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`" }, "if-expression": { "name": "meta.if-expression.typespec", @@ -425,7 +481,7 @@ }, "interface-member": { "name": "meta.interface-member.typespec", - "begin": "(?:\\b(op)\\b\\s+)?(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "(?:\\b(op)\\b\\s+)?(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" @@ -503,7 +559,7 @@ "include": "#decorator" }, { - "include": "#model-spread-property" + "include": "#spread-operator" }, { "include": "#punctuation-semicolon" @@ -530,7 +586,7 @@ }, "model-property": { "name": "meta.model-property.typespec", - "begin": "(?:(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)|(\\\"(?:[^\\\"\\\\]|\\\\.)*\\\"))", + "begin": "(?:(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)|(\\\"(?:[^\\\"\\\\]|\\\\.)*\\\"))", "beginCaptures": { "1": { "name": "variable.name.tsp" @@ -555,21 +611,6 @@ } ] }, - "model-spread-property": { - "name": "meta.model-spread-property.typespec", - "begin": "\\.\\.\\.", - "beginCaptures": { - "0": { - "name": "keyword.operator.spread.tsp" - } - }, - "end": "(?=,|;|@|\\)|\\}|\\b(?:extern)\\b|\\b(?:namespace|model|op|using|import|enum|alias|union|interface|dec|fn)\\b)", - "patterns": [ - { - "include": "#expression" - } - ] - }, "model-statement": { "name": "meta.model-statement.typespec", "begin": "\\b(model)\\b", @@ -692,7 +733,7 @@ "include": "#model-property" }, { - "include": "#model-spread-property" + "include": "#spread-operator" }, { "include": "#punctuation-comma" @@ -717,7 +758,7 @@ }, "operation-statement": { "name": "meta.operation-statement.typespec", - "begin": "\\b(op)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "\\b(op)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" @@ -822,7 +863,7 @@ }, "projection-parameter": { "name": "meta.projection-parameter.typespec", - "begin": "(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "variable.name.tsp" @@ -856,7 +897,7 @@ }, "projection-statement": { "name": "meta.projection-statement.typespec", - "begin": "\\b(projection)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)(#)(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "\\b(projection)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)(#)(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" @@ -952,6 +993,21 @@ } ] }, + "spread-operator": { + "name": "meta.spread-operator.typespec", + "begin": "\\.\\.\\.", + "beginCaptures": { + "0": { + "name": "keyword.operator.spread.tsp" + } + }, + "end": "(?=,|;|@|\\)|\\}|\\b(?:extern)\\b|\\b(?:namespace|model|op|using|import|enum|alias|union|interface|dec|fn)\\b)", + "patterns": [ + { + "include": "#expression" + } + ] + }, "statement": { "patterns": [ { @@ -1150,7 +1206,7 @@ }, "type-parameter": { "name": "meta.type-parameter.typespec", - "begin": "(\\b[_$[:alpha:]][_$[:alnum:]]*\\b)", + "begin": "(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "entity.name.type.tsp" @@ -1222,12 +1278,50 @@ } ] }, + "union-body": { + "name": "meta.union-body.typespec", + "begin": "\\{", + "beginCaptures": { + "0": { + "name": "punctuation.curlybrace.open.tsp" + } + }, + "end": "\\}", + "endCaptures": { + "0": { + "name": "punctuation.curlybrace.close.tsp" + } + }, + "patterns": [ + { + "include": "#union-variant" + }, + { + "include": "#token" + }, + { + "include": "#directive" + }, + { + "include": "#decorator" + }, + { + "include": "#expression" + }, + { + "include": "#punctuation-comma" + } + ] + }, "union-statement": { "name": "meta.union-statement.typespec", - "begin": "\\b(union)\\b", + "begin": "\\b(union)\\b\\s+(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)", "beginCaptures": { "1": { "name": "keyword.other.tsp" + }, + "2": { + "name": "entity.name.type.tsp" } }, "end": "(?<=\\})|(?=,|;|@|\\)|\\}|\\b(?:extern)\\b|\\b(?:namespace|model|op|using|import|enum|alias|union|interface|dec|fn)\\b)", @@ -1236,7 +1330,25 @@ "include": "#token" }, { - "include": "#type-parameters" + "include": "#union-body" + } + ] + }, + "union-variant": { + "name": "meta.union-variant.typespec", + "begin": "(?:(\\b[_$[:alpha:]][_$[:alnum:]]*\\b|`(?:[^`\\\\]|\\\\.)*`)\\s*(:))", + "beginCaptures": { + "1": { + "name": "variable.name.tsp" + }, + "2": { + "name": "keyword.operator.type.annotation.tsp" + } + }, + "end": "(?=,|;|@|\\)|\\}|\\b(?:extern)\\b|\\b(?:namespace|model|op|using|import|enum|alias|union|interface|dec|fn)\\b)", + "patterns": [ + { + "include": "#token" }, { "include": "#expression" diff --git a/packages/compiler/src/server/classify.ts b/packages/compiler/src/server/classify.ts index 3b2dda64e1..5a93c6ae50 100644 --- a/packages/compiler/src/server/classify.ts +++ b/packages/compiler/src/server/classify.ts @@ -218,6 +218,9 @@ export function getSemanticTokens(ast: TypeSpecScriptNode): SemanticToken[] { case SyntaxKind.EnumStatement: classify(node.id, SemanticTokenKind.Enum); break; + case SyntaxKind.UnionStatement: + classify(node.id, SemanticTokenKind.Enum); + break; case SyntaxKind.EnumMember: classify(node.id, SemanticTokenKind.EnumMember); break; diff --git a/packages/compiler/src/server/tmlanguage.ts b/packages/compiler/src/server/tmlanguage.ts index 736cf2fcbd..bd39538e1a 100644 --- a/packages/compiler/src/server/tmlanguage.ts +++ b/packages/compiler/src/server/tmlanguage.ts @@ -58,7 +58,9 @@ const identifierStart = "[_$[:alpha:]]"; // cspell:disable-next-line const identifierContinue = "[_$[:alnum:]]"; const beforeIdentifier = `(?=${identifierStart})`; -const identifier = `\\b${identifierStart}${identifierContinue}*\\b`; +const escapedIdentifier = "`(?:[^`\\\\]|\\\\.)*`"; +const simpleIdentifier = `\\b${identifierStart}${identifierContinue}*\\b`; +const identifier = `${simpleIdentifier}|${escapedIdentifier}`; const qualifiedIdentifier = `\\b${identifierStart}(${identifierContinue}|\\.${identifierStart})*\\b`; const stringPattern = '\\"(?:[^\\"\\\\]|\\\\.)*\\"'; const modifierKeyword = `\\b(?:extern)\\b`; @@ -403,8 +405,8 @@ const modelProperty: BeginEndRule = { patterns: [token, typeAnnotation, operatorAssignment, expression], }; -const modelSpreadProperty: BeginEndRule = { - key: "model-spread-property", +const spreadExpression: BeginEndRule = { + key: "spread-operator", scope: meta, begin: "\\.\\.\\.", beginCaptures: { @@ -417,7 +419,7 @@ const modelSpreadProperty: BeginEndRule = { const directive: BeginEndRule = { key: "directive", scope: meta, - begin: `\\s*(#${identifier})`, + begin: `\\s*(#${simpleIdentifier})`, beginCaptures: { "1": { scope: "keyword.directive.name.tsp" }, }, @@ -444,7 +446,7 @@ const modelExpression: BeginEndRule = { token, directive, decorator, - modelSpreadProperty, + spreadExpression, punctuationSemicolon, ], }; @@ -503,26 +505,80 @@ const scalarStatement: BeginEndRule = { ], }; +const enumMember: BeginEndRule = { + key: "enum-member", + scope: meta, + begin: `(?:(${identifier})\\s*(:?))`, + beginCaptures: { + "1": { scope: "variable.name.tsp" }, + "2": { scope: "keyword.operator.type.annotation.tsp" }, + }, + end: universalEnd, + patterns: [token, typeAnnotation], +}; + +const enumBody: BeginEndRule = { + key: "enum-body", + scope: meta, + begin: "\\{", + beginCaptures: { + "0": { scope: "punctuation.curlybrace.open.tsp" }, + }, + end: "\\}", + endCaptures: { + "0": { scope: "punctuation.curlybrace.close.tsp" }, + }, + patterns: [enumMember, token, directive, decorator, punctuationComma], +}; + const enumStatement: BeginEndRule = { key: "enum-statement", scope: meta, - begin: "\\b(enum)\\b", + begin: `\\b(enum)\\b\\s+(${identifier})`, beginCaptures: { "1": { scope: "keyword.other.tsp" }, + "2": { scope: "entity.name.type.tsp" }, }, end: `(?<=\\})|${universalEnd}`, + patterns: [token, enumBody], +}; + +const namedUnionVariant: BeginEndRule = { + key: "union-variant", + scope: meta, + begin: `(?:(${identifier})\\s*(:))`, + beginCaptures: { + "1": { scope: "variable.name.tsp" }, + "2": { scope: "keyword.operator.type.annotation.tsp" }, + }, + end: universalEnd, patterns: [token, expression], }; +const unionBody: BeginEndRule = { + key: "union-body", + scope: meta, + begin: "\\{", + beginCaptures: { + "0": { scope: "punctuation.curlybrace.open.tsp" }, + }, + end: "\\}", + endCaptures: { + "0": { scope: "punctuation.curlybrace.close.tsp" }, + }, + patterns: [namedUnionVariant, token, directive, decorator, expression, punctuationComma], +}; + const unionStatement: BeginEndRule = { key: "union-statement", scope: meta, - begin: "\\b(union)\\b", + begin: `\\b(union)\\b\\s+(${identifier})`, beginCaptures: { "1": { scope: "keyword.other.tsp" }, + "2": { scope: "entity.name.type.tsp" }, }, end: `(?<=\\})|${universalEnd}`, - patterns: [token, typeParameters, expression], + patterns: [token, unionBody], }; const aliasStatement: BeginEndRule = { @@ -580,7 +636,7 @@ const operationParameters: BeginEndRule = { endCaptures: { "0": { scope: "punctuation.parenthesis.close.tsp" }, }, - patterns: [token, decorator, modelProperty, modelSpreadProperty, punctuationComma], + patterns: [token, decorator, modelProperty, spreadExpression, punctuationComma], }; const operationHeritage: BeginEndRule = { @@ -896,7 +952,7 @@ const grammar: Grammar = { $schema: tm.schema, name: "TypeSpec", scopeName: "source.tsp", - fileTypes: [".tsp"], + fileTypes: ["tsp"], patterns: [statement], }; diff --git a/packages/compiler/test/server/colorization.test.ts b/packages/compiler/test/server/colorization.test.ts index 3c2dac15c1..f364c5eeae 100644 --- a/packages/compiler/test/server/colorization.test.ts +++ b/packages/compiler/test/server/colorization.test.ts @@ -33,6 +33,8 @@ const Token = { keywords: { model: createToken("model", "keyword.other.tsp"), scalar: createToken("scalar", "keyword.other.tsp"), + enum: createToken("enum", "keyword.other.tsp"), + union: createToken("union", "keyword.other.tsp"), operation: createToken("op", "keyword.other.tsp"), namespace: createToken("namespace", "keyword.other.tsp"), interface: createToken("interface", "keyword.other.tsp"), @@ -768,6 +770,124 @@ function testColorization(description: string, tokenize: Tokenize) { ]); }); + describe("enums", () => { + it("simple enum", async () => { + const tokens = await tokenize("enum Foo {}"); + deepStrictEqual(tokens, [ + Token.keywords.enum, + Token.identifiers.type("Foo"), + Token.punctuation.openBrace, + Token.punctuation.closeBrace, + ]); + }); + + it("enum with simple members", async () => { + const tokens = await tokenize("enum Direction { up, down}"); + deepStrictEqual(tokens, [ + Token.keywords.enum, + Token.identifiers.type("Direction"), + Token.punctuation.openBrace, + Token.identifiers.variable("up"), + Token.punctuation.comma, + Token.identifiers.variable("down"), + Token.punctuation.closeBrace, + ]); + }); + + it("enum with escaped identifiers", async () => { + const tokens = await tokenize("enum Direction { `North West`, `North West`}"); + deepStrictEqual(tokens, [ + Token.keywords.enum, + Token.identifiers.type("Direction"), + Token.punctuation.openBrace, + Token.identifiers.variable("`North West`"), + Token.punctuation.comma, + Token.identifiers.variable("`North West`"), + Token.punctuation.closeBrace, + ]); + }); + + it("enum with string values", async () => { + const tokens = await tokenize(`enum Direction { up: "Up", down: "Down"}`); + deepStrictEqual(tokens, [ + Token.keywords.enum, + Token.identifiers.type("Direction"), + Token.punctuation.openBrace, + Token.identifiers.variable("up"), + Token.operators.typeAnnotation, + Token.literals.stringQuoted("Up"), + Token.punctuation.comma, + Token.identifiers.variable("down"), + Token.operators.typeAnnotation, + Token.literals.stringQuoted("Down"), + Token.punctuation.closeBrace, + ]); + }); + }); + + describe("union statements", () => { + it("simple union", async () => { + const tokens = await tokenize("union Foo {}"); + deepStrictEqual(tokens, [ + Token.keywords.union, + Token.identifiers.type("Foo"), + Token.punctuation.openBrace, + Token.punctuation.closeBrace, + ]); + }); + + it("union with unamed variants", async () => { + const tokens = await tokenize(`union Direction { "up", string, 123 }`); + deepStrictEqual(tokens, [ + Token.keywords.union, + Token.identifiers.type("Direction"), + Token.punctuation.openBrace, + Token.literals.stringQuoted("up"), + Token.punctuation.comma, + Token.identifiers.type("string"), + Token.punctuation.comma, + Token.literals.numeric("123"), + Token.punctuation.closeBrace, + ]); + }); + + it("union with named variants", async () => { + const tokens = await tokenize(`union Direction { up: "Up", down: "Down" }`); + deepStrictEqual(tokens, [ + Token.keywords.union, + Token.identifiers.type("Direction"), + Token.punctuation.openBrace, + Token.identifiers.variable("up"), + Token.operators.typeAnnotation, + Token.literals.stringQuoted("Up"), + Token.punctuation.comma, + Token.identifiers.variable("down"), + Token.operators.typeAnnotation, + Token.literals.stringQuoted("Down"), + Token.punctuation.closeBrace, + ]); + }); + + it("union with named variants with escaped identifier", async () => { + const tokens = await tokenize( + `union Direction { \`north east\`: "North East", \`north west\`: "North West" }` + ); + deepStrictEqual(tokens, [ + Token.keywords.union, + Token.identifiers.type("Direction"), + Token.punctuation.openBrace, + Token.identifiers.variable("`north east`"), + Token.operators.typeAnnotation, + Token.literals.stringQuoted("North East"), + Token.punctuation.comma, + Token.identifiers.variable("`north west`"), + Token.operators.typeAnnotation, + Token.literals.stringQuoted("North West"), + Token.punctuation.closeBrace, + ]); + }); + }); + describe("namespaces", () => { it("simple global namespace", async () => { const tokens = await tokenize("namespace Foo;");