Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ES|QL] improve tokenizer and theme #190170

Merged
merged 9 commits into from
Aug 9, 2024
133 changes: 133 additions & 0 deletions packages/kbn-monaco/src/esql/lib/esql_theme.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { ESQLErrorListener, getLexer as _getLexer } from '@kbn/esql-ast';
import { ESQL_TOKEN_POSTFIX } from './constants';
import { buildESQlTheme } from './esql_theme';
import { CharStreams } from 'antlr4';

describe('ESQL Theme', () => {
it('should not have multiple rules for a single token', () => {
const theme = buildESQlTheme();

const seen = new Set<string>();
const duplicates: string[] = [];
for (const rule of theme.rules) {
if (seen.has(rule.token)) {
duplicates.push(rule.token);
}
seen.add(rule.token);
}

expect(duplicates).toEqual([]);
});

const getLexer = () => {
const errorListener = new ESQLErrorListener();
const inputStream = CharStreams.fromString('FROM foo');
return _getLexer(inputStream, errorListener);
};

const lexer = getLexer();
const lexicalNames = lexer.symbolicNames
.filter((name) => typeof name === 'string')
.map((name) => name!.toLowerCase());

it('every rule should apply to a valid lexical name', () => {
const theme = buildESQlTheme();

// These names aren't from the lexer... they are added on our side
// see packages/kbn-monaco/src/esql/lib/esql_token_helpers.ts
const syntheticNames = ['functions', 'nulls_order', 'timespan_literal'];

for (const rule of theme.rules) {
expect([...lexicalNames, ...syntheticNames]).toContain(
rule.token.replace(ESQL_TOKEN_POSTFIX, '').toLowerCase()
);
}
});

it('every valid lexical name should have a corresponding rule', () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we like this test? It seemed like a good idea as an extra check that we aren't missing some lexical token that gets added. But, we can remove if it seems like a pain.

Copy link
Contributor

@stratoula stratoula Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, leave it and let's see how painful it gets

const theme = buildESQlTheme();
const tokenIDs = theme.rules.map((rule) => rule.token.replace(ESQL_TOKEN_POSTFIX, ''));

const validExceptions = [
'unquoted_source',
'false', // @TODO consider if this should get styling
'true', // @TODO consider if this should get styling
'info', // @TODO consider if this should get styling
'colon', // @TODO consider if this should get styling
Comment on lines +61 to +64
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does kind of feel like these should be styled somehow cc @ryankeairns


'nulls', // nulls is a part of nulls_order so it doesn't need its own rule
'first', // first is a part of nulls_order so it doesn't need its own rule
'last', // last is a part of nulls_order so it doesn't need its own rule

'id_pattern', // "KEEP <id_pattern>, <id_pattern>"... no styling needed
'enrich_policy_name', // "ENRICH <enrich_policy_name>"
'expr_ws', // whitespace, so no reason to style it
'unknown_cmd', // unknown command, so no reason to style it

// Lexer-mode-specific stuff
'explain_line_comment',
'explain_multiline_comment',
'explain_ws',
'project_line_comment',
'project_multiline_comment',
'project_ws',
'rename_line_comment',
'rename_multiline_comment',
'rename_ws',
'from_line_comment',
'from_multiline_comment',
'from_ws',
'enrich_line_comment',
'enrich_multiline_comment',
'enrich_ws',
'mvexpand_line_comment',
'mvexpand_multiline_comment',
'mvexpand_ws',
'enrich_field_line_comment',
'enrich_field_multiline_comment',
'enrich_field_ws',
'lookup_line_comment',
'lookup_multiline_comment',
'lookup_ws',
'lookup_field_line_comment',
'lookup_field_multiline_comment',
'lookup_field_ws',
'show_line_comment',
'show_multiline_comment',
'show_ws',
'meta_line_comment',
'meta_multiline_comment',
'meta_ws',
'setting',
'setting_line_comment',
'settting_multiline_comment',
'setting_ws',
'metrics_line_comment',
'metrics_multiline_comment',
'metrics_ws',
'closing_metrics_line_comment',
'closing_metrics_multiline_comment',
'closing_metrics_ws',
];

// First, check that every valid exception is actually valid
for (const name of validExceptions) {
expect(lexicalNames).toContain(name);
}

const namesToCheck = lexicalNames.filter((name) => !validExceptions.includes(name));

// Now, check that every lexical name has a corresponding rule
for (const name of namesToCheck) {
expect(tokenIDs).toContain(name);
}
});
});
37 changes: 14 additions & 23 deletions packages/kbn-monaco/src/esql/lib/esql_theme.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,15 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
...buildRuleGroup(
[
'explain',
'row',
'limit',
'ws',
'assign',
'comma',
'dot',
'first',
'last',
'opening_bracket',
'closing_bracket',
'quoted_identifier',
'src_ws',
'unquoted_identifier',
'pipe',
'not',
'percent',
'integer_literal',
'decimal_literal',
'src_unquoted_identifier',
'src_quoted_identifier',
'string',
],
euiThemeVars.euiTextColor
),
Expand All @@ -57,12 +45,14 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
...buildRuleGroup(
[
'metrics',
'meta',
'metadata',
'match',
'mv_expand',
'stats',
'inlinestats',
'dissect',
'grok',
'project',
'keep',
'rename',
'drop',
Expand All @@ -76,8 +66,8 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
'rlike',
'in',
'as',
'expr_ws',
'limit',
'lookup',
'null',
'enrich',
'on',
Expand Down Expand Up @@ -112,26 +102,27 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
'asterisk', // '*'
'slash', // '/'
'percent', // '%'
'cast_op', // '::'
],
euiThemeVars.euiColorPrimaryText
),

// comments
...buildRuleGroup(
[
'line_comment',
'multiline_comment',
'expr_line_comment',
'expr_multiline_comment',
'src_line_comment',
'src_multiline_comment',
],
['line_comment', 'multiline_comment', 'expr_line_comment', 'expr_multiline_comment'],
euiThemeVars.euiColorDisabledText
),

// values
...buildRuleGroup(
['quoted_string', 'integer_literal', 'decimal_literal', 'named_or_positional_param'],
[
'quoted_string',
'integer_literal',
'decimal_literal',
'named_or_positional_param',
'param',
'timespan_literal',
],
euiThemeVars.euiColorSuccessText
),
],
Expand Down
54 changes: 42 additions & 12 deletions packages/kbn-monaco/src/esql/lib/esql_token_helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import { monaco } from '../../monaco_imports';
import { ESQL_TOKEN_POSTFIX } from './constants';
import { ESQLToken } from './esql_token';

function nonNullable<T>(value: T | undefined): value is T {
return value != null;
Expand All @@ -33,17 +34,46 @@ export function addFunctionTokens(tokens: monaco.languages.IToken[]): monaco.lan
return [...tokens];
}

export function addNullsOrder(tokens: monaco.languages.IToken[]): void {
const nullsIndex = tokens.findIndex((token) => token.scopes === 'nulls' + ESQL_TOKEN_POSTFIX);
if (
// did we find a "nulls"?
nullsIndex > -1 &&
// is the next non-whitespace token an order?
['first' + ESQL_TOKEN_POSTFIX, 'last' + ESQL_TOKEN_POSTFIX].includes(
tokens[nullsIndex + 2]?.scopes
)
) {
tokens[nullsIndex].scopes = 'nulls_order' + ESQL_TOKEN_POSTFIX;
tokens.splice(nullsIndex + 1, 2);
const mergeRules = [
[['nulls', 'expr_ws', 'first'], 'nulls_order'],
[['nulls', 'expr_ws', 'last'], 'nulls_order'],
[['integer', 'unquoted_identifier'], 'timespan_literal'],
[['integer_literal', 'expr_ws', 'unquoted_identifier'], 'timespan_literal'],
] as const;

export function mergeTokens(tokens: ESQLToken[]): monaco.languages.IToken[] {
for (const [scopes, newScope] of mergeRules) {
let foundAnyMatches = false;
do {
foundAnyMatches = false;
for (let i = 0; i < tokens.length; i++) {
if (tokens[i].scopes === scopes[0] + ESQL_TOKEN_POSTFIX) {
// first matched so look ahead if there's room
if (i + scopes.length > tokens.length) {
continue;
}

let match = true;
for (let j = 1; j < scopes.length; j++) {
if (tokens[i + j].scopes !== scopes[j] + ESQL_TOKEN_POSTFIX) {
match = false;
break;
}
}

if (match) {
foundAnyMatches = true;
const mergedToken = new ESQLToken(
newScope,
tokens[i].startIndex,
tokens[i + scopes.length - 1].stopIndex
);
tokens.splice(i, scopes.length, mergedToken);
}
}
}
} while (foundAnyMatches);
}

return tokens;
}
70 changes: 70 additions & 0 deletions packages/kbn-monaco/src/esql/lib/esql_tokens_provider.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { ESQLState } from './esql_state';
import { ESQLToken } from './esql_token';
import { ESQLTokensProvider } from './esql_tokens_provider';

describe('ES|QL Tokens Provider', () => {
it('should tokenize a line', () => {
const line = 'SELECT * FROM my_index';
const prevState = new ESQLState();
const provider = new ESQLTokensProvider();
const { tokens } = provider.tokenize(line, prevState);
expect(tokens.map((t) => t.scopes)).toEqual([
'unknown_cmd.esql',
'expr_ws.esql',
'asterisk.esql',
'expr_ws.esql',
'unquoted_identifier.esql',
'expr_ws.esql',
'unquoted_identifier.esql',
]);
});

it('should properly tokenize functions', () => {
const line = 'FROM my_index | EVAL date_diff("day", NOW()) | STATS abs(field1), avg(field1)';
const provider = new ESQLTokensProvider();
const { tokens } = provider.tokenize(line, new ESQLState());
const functionTokens = tokens.filter((t) => t.scopes === 'functions.esql');
expect(functionTokens).toHaveLength(4);
});

it('should properly tokenize SORT... NULLS clauses', () => {
const line = 'SELECT * FROM my_index | SORT BY field1 ASC NULLS FIRST, field2 DESC NULLS LAST';
const provider = new ESQLTokensProvider();
const { tokens } = provider.tokenize(line, new ESQLState());
// Make sure the tokens got merged properly
const nullsOrderTokens = tokens.filter((t) => t.scopes === 'nulls_order.esql');
expect(nullsOrderTokens).toHaveLength(2);
expect(nullsOrderTokens).toEqual<ESQLToken[]>([
{
scopes: 'nulls_order.esql',
startIndex: 44,
stopIndex: 54,
},
{
scopes: 'nulls_order.esql',
startIndex: 69,
stopIndex: 78,
},
]);
// Ensure that the NULLS FIRST and NULLS LAST tokens are not present
expect(tokens.map((t) => t.scopes)).not.toContain('nulls.esql');
expect(tokens.map((t) => t.scopes)).not.toContain('first.esql');
expect(tokens.map((t) => t.scopes)).not.toContain('last.esql');
});

it('should properly tokenize timespan literals', () => {
const line = 'SELECT * FROM my_index | WHERE date_field > 1 day AND other_field < 2 hours';
const provider = new ESQLTokensProvider();
const { tokens } = provider.tokenize(line, new ESQLState());
const timespanTokens = tokens.filter((t) => t.scopes === 'timespan_literal.esql');
expect(timespanTokens).toHaveLength(2);
});
});
6 changes: 3 additions & 3 deletions packages/kbn-monaco/src/esql/lib/esql_tokens_provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { ESQLLineTokens } from './esql_line_tokens';
import { ESQLState } from './esql_state';

import { ESQL_TOKEN_POSTFIX } from './constants';
import { addFunctionTokens, addNullsOrder } from './esql_token_helpers';
import { addFunctionTokens, mergeTokens } from './esql_token_helpers';

const EOF = -1;

Expand All @@ -37,7 +37,7 @@ export class ESQLTokensProvider implements monaco.languages.TokensProvider {
const lexer = getLexer(inputStream, errorListener);

let done = false;
const myTokens: monaco.languages.IToken[] = [];
const myTokens: ESQLToken[] = [];

do {
let token: Token | null;
Expand Down Expand Up @@ -78,7 +78,7 @@ export class ESQLTokensProvider implements monaco.languages.TokensProvider {
// the previous custom Kibana grammar baked functions directly as tokens, so highlight was easier
// The ES grammar doesn't have the token concept of "function"
const tokensWithFunctions = addFunctionTokens(myTokens);
addNullsOrder(tokensWithFunctions);
mergeTokens(tokensWithFunctions);

return new ESQLLineTokens(tokensWithFunctions, prevState.getLineNumber() + 1);
}
Expand Down