elastic · drewdaemon · Aug 9, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
@@ -0,0 +1,133 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+import { ESQLErrorListener, getLexer as _getLexer } from '@kbn/esql-ast';
+import { ESQL_TOKEN_POSTFIX } from './constants';
+import { buildESQlTheme } from './esql_theme';
+import { CharStreams } from 'antlr4';
+
+describe('ESQL Theme', () => {
+  it('should not have multiple rules for a single token', () => {
+    const theme = buildESQlTheme();
+
+    const seen = new Set<string>();
+    const duplicates: string[] = [];
+    for (const rule of theme.rules) {
+      if (seen.has(rule.token)) {
+        duplicates.push(rule.token);
+      }
+      seen.add(rule.token);
+    }
+
+    expect(duplicates).toEqual([]);
+  });
+
+  const getLexer = () => {
+    const errorListener = new ESQLErrorListener();
+    const inputStream = CharStreams.fromString('FROM foo');
+    return _getLexer(inputStream, errorListener);
+  };
+
+  const lexer = getLexer();
+  const lexicalNames = lexer.symbolicNames
+    .filter((name) => typeof name === 'string')
+    .map((name) => name!.toLowerCase());
+
+  it('every rule should apply to a valid lexical name', () => {
+    const theme = buildESQlTheme();
+
+    // These names aren't from the lexer... they are added on our side
+    // see packages/kbn-monaco/src/esql/lib/esql_token_helpers.ts
+    const syntheticNames = ['functions', 'nulls_order', 'timespan_literal'];
+
+    for (const rule of theme.rules) {
+      expect([...lexicalNames, ...syntheticNames]).toContain(
+        rule.token.replace(ESQL_TOKEN_POSTFIX, '').toLowerCase()
+      );
+    }
+  });
+
+  it('every valid lexical name should have a corresponding rule', () => {
+    const theme = buildESQlTheme();
+    const tokenIDs = theme.rules.map((rule) => rule.token.replace(ESQL_TOKEN_POSTFIX, ''));
+
+    const validExceptions = [
+      'unquoted_source',
+      'false', // @TODO consider if this should get styling
+      'true', // @TODO consider if this should get styling
+      'info', // @TODO consider if this should get styling
+      'colon', // @TODO consider if this should get styling
+
+      'nulls', // nulls is a part of nulls_order so it doesn't need its own rule
+      'first', // first is a part of nulls_order so it doesn't need its own rule
+      'last', // last is a part of nulls_order so it doesn't need its own rule
+
+      'id_pattern', // "KEEP <id_pattern>, <id_pattern>"... no styling needed
+      'enrich_policy_name', // "ENRICH <enrich_policy_name>"
+      'expr_ws', // whitespace, so no reason to style it
+      'unknown_cmd', // unknown command, so no reason to style it
+
+      // Lexer-mode-specific stuff
+      'explain_line_comment',
+      'explain_multiline_comment',
+      'explain_ws',
+      'project_line_comment',
+      'project_multiline_comment',
+      'project_ws',
+      'rename_line_comment',
+      'rename_multiline_comment',
+      'rename_ws',
+      'from_line_comment',
+      'from_multiline_comment',
+      'from_ws',
+      'enrich_line_comment',
+      'enrich_multiline_comment',
+      'enrich_ws',
+      'mvexpand_line_comment',
+      'mvexpand_multiline_comment',
+      'mvexpand_ws',
+      'enrich_field_line_comment',
+      'enrich_field_multiline_comment',
+      'enrich_field_ws',
+      'lookup_line_comment',
+      'lookup_multiline_comment',
+      'lookup_ws',
+      'lookup_field_line_comment',
+      'lookup_field_multiline_comment',
+      'lookup_field_ws',
+      'show_line_comment',
+      'show_multiline_comment',
+      'show_ws',
+      'meta_line_comment',
+      'meta_multiline_comment',
+      'meta_ws',
+      'setting',
+      'setting_line_comment',
+      'settting_multiline_comment',
+      'setting_ws',
+      'metrics_line_comment',
+      'metrics_multiline_comment',
+      'metrics_ws',
+      'closing_metrics_line_comment',
+      'closing_metrics_multiline_comment',
+      'closing_metrics_ws',
+    ];
+
+    // First, check that every valid exception is actually valid
+    for (const name of validExceptions) {
+      expect(lexicalNames).toContain(name);
+    }
+
+    const namesToCheck = lexicalNames.filter((name) => !validExceptions.includes(name));
+
+    // Now, check that every lexical name has a corresponding rule
+    for (const name of namesToCheck) {
+      expect(tokenIDs).toContain(name);
+    }
+  });
+});
@@ -21,27 +21,15 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
     ...buildRuleGroup(
       [
         'explain',
-        'row',
-        'limit',
         'ws',
         'assign',
         'comma',
         'dot',
-        'first',
-        'last',
         'opening_bracket',
         'closing_bracket',
         'quoted_identifier',
-        'src_ws',
         'unquoted_identifier',
         'pipe',
-        'not',
-        'percent',
-        'integer_literal',
-        'decimal_literal',
-        'src_unquoted_identifier',
-        'src_quoted_identifier',
-        'string',
       ],
       euiThemeVars.euiTextColor
     ),
@@ -57,12 +45,14 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
     ...buildRuleGroup(
       [
         'metrics',
+        'meta',
         'metadata',
+        'match',
         'mv_expand',
         'stats',
+        'inlinestats',
         'dissect',
         'grok',
-        'project',
         'keep',
         'rename',
         'drop',
@@ -76,8 +66,8 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
         'rlike',
         'in',
         'as',
-        'expr_ws',
         'limit',
+        'lookup',
         'null',
         'enrich',
         'on',
@@ -112,26 +102,27 @@ export const buildESQlTheme = (): monaco.editor.IStandaloneThemeData => ({
         'asterisk', // '*'
         'slash', // '/'
         'percent', // '%'
+        'cast_op', // '::'
       ],
       euiThemeVars.euiColorPrimaryText
     ),
 
     // comments
     ...buildRuleGroup(
-      [
-        'line_comment',
-        'multiline_comment',
-        'expr_line_comment',
-        'expr_multiline_comment',
-        'src_line_comment',
-        'src_multiline_comment',
-      ],
+      ['line_comment', 'multiline_comment', 'expr_line_comment', 'expr_multiline_comment'],
       euiThemeVars.euiColorDisabledText
     ),
 
     // values
     ...buildRuleGroup(
-      ['quoted_string', 'integer_literal', 'decimal_literal', 'named_or_positional_param'],
+      [
+        'quoted_string',
+        'integer_literal',
+        'decimal_literal',
+        'named_or_positional_param',
+        'param',
+        'timespan_literal',
+      ],
       euiThemeVars.euiColorSuccessText
     ),
   ],

@@ -8,6 +8,7 @@
 
 import { monaco } from '../../monaco_imports';
 import { ESQL_TOKEN_POSTFIX } from './constants';
+import { ESQLToken } from './esql_token';
 
 function nonNullable<T>(value: T | undefined): value is T {
   return value != null;
@@ -33,17 +34,62 @@ export function addFunctionTokens(tokens: monaco.languages.IToken[]): monaco.lan
   return [...tokens];
 }
 
-export function addNullsOrder(tokens: monaco.languages.IToken[]): void {
-  const nullsIndex = tokens.findIndex((token) => token.scopes === 'nulls' + ESQL_TOKEN_POSTFIX);
-  if (
-    // did we find a "nulls"?
-    nullsIndex > -1 &&
-    // is the next non-whitespace token an order?
-    ['first' + ESQL_TOKEN_POSTFIX, 'last' + ESQL_TOKEN_POSTFIX].includes(
-      tokens[nullsIndex + 2]?.scopes
-    )
-  ) {
-    tokens[nullsIndex].scopes = 'nulls_order' + ESQL_TOKEN_POSTFIX;
-    tokens.splice(nullsIndex + 1, 2);
+const mergeRules = [
+  [['nulls', 'expr_ws', 'first'], 'nulls_order'],
+  [['nulls', 'expr_ws', 'last'], 'nulls_order'],
+  [['unquoted_identifier', 'dot', 'unquoted_identifier'], 'unquoted_identifier'],
+  [['integer', 'unquoted_identifier'], 'timespan_literal'],
+  [['integer_literal', 'expr_ws', 'unquoted_identifier'], 'timespan_literal'],
+] as const;
+
+export function mergeTokens(tokens: ESQLToken[]): monaco.languages.IToken[] {
+  for (const [scopes, newScope] of mergeRules) {
+    let foundAnyMatches = false;
+    do {
+      foundAnyMatches = false;
+      for (let i = 0; i < tokens.length; i++) {
+        if (tokens[i].scopes === scopes[0] + ESQL_TOKEN_POSTFIX) {
+          // first matched so look ahead if there's room
+          if (i + scopes.length > tokens.length) {
+            continue;
+          }
+
+          let match = true;
+          for (let j = 1; j < scopes.length; j++) {
+            if (tokens[i + j].scopes !== scopes[j] + ESQL_TOKEN_POSTFIX) {
+              match = false;
+              break;
+            }
+          }
+
+          if (match) {
+            foundAnyMatches = true;
+            const mergedToken = new ESQLToken(
+              newScope,
+              tokens[i].startIndex,
+              tokens[i + scopes.length - 1].stopIndex
+            );
+            tokens.splice(i, scopes.length, mergedToken);
+          }
+        }
+      }
+    } while (foundAnyMatches);
   }
+
+  return tokens;
 }
+
+// export function addNullsOrder(tokens: monaco.languages.IToken[]): void {
+//   const nullsIndex = tokens.findIndex((token) => token.scopes === 'nulls' + ESQL_TOKEN_POSTFIX);
+//   if (
+//     // did we find a "nulls"?
+//     nullsIndex > -1 &&
+//     // is the next non-whitespace token an order?
+//     ['first' + ESQL_TOKEN_POSTFIX, 'last' + ESQL_TOKEN_POSTFIX].includes(
+//       tokens[nullsIndex + 2]?.scopes
+//     )
+//   ) {
+//     tokens[nullsIndex].scopes = 'nulls_order' + ESQL_TOKEN_POSTFIX;
+//     tokens.splice(nullsIndex + 1, 2);
+//   }
+// }
@@ -0,0 +1,84 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+import { ESQLState } from './esql_state';
+import { ESQLToken } from './esql_token';
+import { ESQLTokensProvider } from './esql_tokens_provider';
+
+describe('ES|QL Tokens Provider', () => {
+  it('should tokenize a line', () => {
+    const line = 'SELECT * FROM my_index';
+    const prevState = new ESQLState();
+    const provider = new ESQLTokensProvider();
+    const { tokens } = provider.tokenize(line, prevState);
+    expect(tokens.map((t) => t.scopes)).toEqual([
+      'unknown_cmd.esql',
+      'expr_ws.esql',
+      'asterisk.esql',
+      'expr_ws.esql',
+      'unquoted_identifier.esql',
+      'expr_ws.esql',
+      'unquoted_identifier.esql',
+    ]);
+  });
+
+  it('should properly tokenize functions', () => {
+    const line = 'FROM my_index | EVAL date_diff("day", NOW()) | STATS abs(field1), avg(field1)';
+    const provider = new ESQLTokensProvider();
+    const { tokens } = provider.tokenize(line, new ESQLState());
+    const functionTokens = tokens.filter((t) => t.scopes === 'functions.esql');
+    expect(functionTokens).toHaveLength(3);
+  });
+
+  it('should properly tokenize SORT... NULLS clauses', () => {
+    const line = 'SELECT * FROM my_index | SORT BY field1 ASC NULLS FIRST, field2 DESC NULLS LAST';
+    const provider = new ESQLTokensProvider();
+    const { tokens } = provider.tokenize(line, new ESQLState());
+    // Make sure the tokens got merged properly
+    const nullsOrderTokens = tokens.filter((t) => t.scopes === 'nulls_order.esql');
+    expect(nullsOrderTokens).toHaveLength(2);
+    expect(nullsOrderTokens).toEqual<ESQLToken[]>([
+      {
+        scopes: 'nulls_order.esql',
+        startIndex: 44,
+        stopIndex: 54,
+      },
+      {
+        scopes: 'nulls_order.esql',
+        startIndex: 69,
+        stopIndex: 78,
+      },
+    ]);
+    // Ensure that the NULLS FIRST and NULLS LAST tokens are not present
+    expect(tokens.map((t) => t.scopes)).not.toContain('nulls.esql');
+    expect(tokens.map((t) => t.scopes)).not.toContain('first.esql');
+    expect(tokens.map((t) => t.scopes)).not.toContain('last.esql');
+  });
+
+  it('should properly tokenize timespan literals', () => {
+    const line = 'SELECT * FROM my_index | WHERE date_field > 1 day AND other_field < 2 hours';
+    const provider = new ESQLTokensProvider();
+    const { tokens } = provider.tokenize(line, new ESQLState());
+    const timespanTokens = tokens.filter((t) => t.scopes === 'timespan_literal.esql');
+    expect(timespanTokens).toHaveLength(2);
+  });
+
+  it('should properly tokenize field names with periods', () => {
+    const provider = new ESQLTokensProvider();
+    const checkFieldName = (line: string) => {
+      const { tokens } = provider.tokenize(line, new ESQLState());
+      expect(
+        [tokens[tokens.length - 2], tokens[tokens.length - 1]].map(({ scopes }) => scopes)
+      ).toEqual(['expr_ws.esql', 'unquoted_identifier.esql']);
+    };
+
+    checkFieldName('FROM my_index | EVAL event.action');
+    checkFieldName('FROM my_index | EVAL event.action.subaction');
+    checkFieldName('FROM my_index | EVAL @timestamp');
+  });
+});