-
Notifications
You must be signed in to change notification settings - Fork 8.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ES|QL] improve tokenizer and theme #190170
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
2083d64
merging nulls order and some field tokens
drewdaemon d31b782
field names with multiple dots are single tokens
drewdaemon 7c6b574
interpret timespan literals as a single token
drewdaemon e924038
whip the theme into shape
drewdaemon 8fb0333
Update monaco_imports.ts
drewdaemon bccd7ac
remove some stuff
drewdaemon d687419
t Merge branch 'fix-tokenizer' of github.com:drewdaemon/kibana into f…
drewdaemon 813f1eb
update function test
drewdaemon af202d5
Update esql_tokens_provider.test.ts
drewdaemon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
import { ESQLErrorListener, getLexer as _getLexer } from '@kbn/esql-ast'; | ||
import { ESQL_TOKEN_POSTFIX } from './constants'; | ||
import { buildESQlTheme } from './esql_theme'; | ||
import { CharStreams } from 'antlr4'; | ||
|
||
describe('ESQL Theme', () => { | ||
it('should not have multiple rules for a single token', () => { | ||
const theme = buildESQlTheme(); | ||
|
||
const seen = new Set<string>(); | ||
const duplicates: string[] = []; | ||
for (const rule of theme.rules) { | ||
if (seen.has(rule.token)) { | ||
duplicates.push(rule.token); | ||
} | ||
seen.add(rule.token); | ||
} | ||
|
||
expect(duplicates).toEqual([]); | ||
}); | ||
|
||
const getLexer = () => { | ||
const errorListener = new ESQLErrorListener(); | ||
const inputStream = CharStreams.fromString('FROM foo'); | ||
return _getLexer(inputStream, errorListener); | ||
}; | ||
|
||
const lexer = getLexer(); | ||
const lexicalNames = lexer.symbolicNames | ||
.filter((name) => typeof name === 'string') | ||
.map((name) => name!.toLowerCase()); | ||
|
||
it('every rule should apply to a valid lexical name', () => { | ||
const theme = buildESQlTheme(); | ||
|
||
// These names aren't from the lexer... they are added on our side | ||
// see packages/kbn-monaco/src/esql/lib/esql_token_helpers.ts | ||
const syntheticNames = ['functions', 'nulls_order', 'timespan_literal']; | ||
|
||
for (const rule of theme.rules) { | ||
expect([...lexicalNames, ...syntheticNames]).toContain( | ||
rule.token.replace(ESQL_TOKEN_POSTFIX, '').toLowerCase() | ||
); | ||
} | ||
}); | ||
|
||
it('every valid lexical name should have a corresponding rule', () => { | ||
const theme = buildESQlTheme(); | ||
const tokenIDs = theme.rules.map((rule) => rule.token.replace(ESQL_TOKEN_POSTFIX, '')); | ||
|
||
const validExceptions = [ | ||
'unquoted_source', | ||
'false', // @TODO consider if this should get styling | ||
'true', // @TODO consider if this should get styling | ||
'info', // @TODO consider if this should get styling | ||
'colon', // @TODO consider if this should get styling | ||
Comment on lines
+61
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does kind of feel like these should be styled somehow cc @ryankeairns |
||
|
||
'nulls', // nulls is a part of nulls_order so it doesn't need its own rule | ||
'first', // first is a part of nulls_order so it doesn't need its own rule | ||
'last', // last is a part of nulls_order so it doesn't need its own rule | ||
|
||
'id_pattern', // "KEEP <id_pattern>, <id_pattern>"... no styling needed | ||
'enrich_policy_name', // "ENRICH <enrich_policy_name>" | ||
'expr_ws', // whitespace, so no reason to style it | ||
'unknown_cmd', // unknown command, so no reason to style it | ||
|
||
// Lexer-mode-specific stuff | ||
'explain_line_comment', | ||
'explain_multiline_comment', | ||
'explain_ws', | ||
'project_line_comment', | ||
'project_multiline_comment', | ||
'project_ws', | ||
'rename_line_comment', | ||
'rename_multiline_comment', | ||
'rename_ws', | ||
'from_line_comment', | ||
'from_multiline_comment', | ||
'from_ws', | ||
'enrich_line_comment', | ||
'enrich_multiline_comment', | ||
'enrich_ws', | ||
'mvexpand_line_comment', | ||
'mvexpand_multiline_comment', | ||
'mvexpand_ws', | ||
'enrich_field_line_comment', | ||
'enrich_field_multiline_comment', | ||
'enrich_field_ws', | ||
'lookup_line_comment', | ||
'lookup_multiline_comment', | ||
'lookup_ws', | ||
'lookup_field_line_comment', | ||
'lookup_field_multiline_comment', | ||
'lookup_field_ws', | ||
'show_line_comment', | ||
'show_multiline_comment', | ||
'show_ws', | ||
'meta_line_comment', | ||
'meta_multiline_comment', | ||
'meta_ws', | ||
'setting', | ||
'setting_line_comment', | ||
'settting_multiline_comment', | ||
'setting_ws', | ||
'metrics_line_comment', | ||
'metrics_multiline_comment', | ||
'metrics_ws', | ||
'closing_metrics_line_comment', | ||
'closing_metrics_multiline_comment', | ||
'closing_metrics_ws', | ||
]; | ||
|
||
// First, check that every valid exception is actually valid | ||
for (const name of validExceptions) { | ||
expect(lexicalNames).toContain(name); | ||
} | ||
|
||
const namesToCheck = lexicalNames.filter((name) => !validExceptions.includes(name)); | ||
|
||
// Now, check that every lexical name has a corresponding rule | ||
for (const name of namesToCheck) { | ||
expect(tokenIDs).toContain(name); | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
packages/kbn-monaco/src/esql/lib/esql_tokens_provider.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
import { ESQLState } from './esql_state'; | ||
import { ESQLToken } from './esql_token'; | ||
import { ESQLTokensProvider } from './esql_tokens_provider'; | ||
|
||
describe('ES|QL Tokens Provider', () => { | ||
it('should tokenize a line', () => { | ||
const line = 'SELECT * FROM my_index'; | ||
const prevState = new ESQLState(); | ||
const provider = new ESQLTokensProvider(); | ||
const { tokens } = provider.tokenize(line, prevState); | ||
expect(tokens.map((t) => t.scopes)).toEqual([ | ||
'unknown_cmd.esql', | ||
'expr_ws.esql', | ||
'asterisk.esql', | ||
'expr_ws.esql', | ||
'unquoted_identifier.esql', | ||
'expr_ws.esql', | ||
'unquoted_identifier.esql', | ||
]); | ||
}); | ||
|
||
it('should properly tokenize functions', () => { | ||
const line = 'FROM my_index | EVAL date_diff("day", NOW()) | STATS abs(field1), avg(field1)'; | ||
const provider = new ESQLTokensProvider(); | ||
const { tokens } = provider.tokenize(line, new ESQLState()); | ||
const functionTokens = tokens.filter((t) => t.scopes === 'functions.esql'); | ||
expect(functionTokens).toHaveLength(4); | ||
}); | ||
|
||
it('should properly tokenize SORT... NULLS clauses', () => { | ||
const line = 'SELECT * FROM my_index | SORT BY field1 ASC NULLS FIRST, field2 DESC NULLS LAST'; | ||
const provider = new ESQLTokensProvider(); | ||
const { tokens } = provider.tokenize(line, new ESQLState()); | ||
// Make sure the tokens got merged properly | ||
const nullsOrderTokens = tokens.filter((t) => t.scopes === 'nulls_order.esql'); | ||
expect(nullsOrderTokens).toHaveLength(2); | ||
expect(nullsOrderTokens).toEqual<ESQLToken[]>([ | ||
{ | ||
scopes: 'nulls_order.esql', | ||
startIndex: 44, | ||
stopIndex: 54, | ||
}, | ||
{ | ||
scopes: 'nulls_order.esql', | ||
startIndex: 69, | ||
stopIndex: 78, | ||
}, | ||
]); | ||
// Ensure that the NULLS FIRST and NULLS LAST tokens are not present | ||
expect(tokens.map((t) => t.scopes)).not.toContain('nulls.esql'); | ||
expect(tokens.map((t) => t.scopes)).not.toContain('first.esql'); | ||
expect(tokens.map((t) => t.scopes)).not.toContain('last.esql'); | ||
}); | ||
|
||
it('should properly tokenize timespan literals', () => { | ||
const line = 'SELECT * FROM my_index | WHERE date_field > 1 day AND other_field < 2 hours'; | ||
const provider = new ESQLTokensProvider(); | ||
const { tokens } = provider.tokenize(line, new ESQLState()); | ||
const timespanTokens = tokens.filter((t) => t.scopes === 'timespan_literal.esql'); | ||
expect(timespanTokens).toHaveLength(2); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we like this test? It seemed like a good idea as an extra check that we aren't missing some lexical token that gets added. But, we can remove if it seems like a pain.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, leave it and let's see how painful it gets