Add tags to the emr jobs based on the query types

Signed-off-by: Vamsi Manohar <[email protected]>
opensearch-project · Sep 26, 2023 · 164fc4d · 164fc4d
1 parent be82714
commit 164fc4d
Show file tree

Hide file tree

Showing 17 changed files with 3,153 additions and 24 deletions.
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
@@ -301,7 +301,10 @@ private AsyncQueryExecutorService createAsyncQueryExecutorService() {
     JobExecutionResponseReader jobExecutionResponseReader = new JobExecutionResponseReader(client);
     SparkQueryDispatcher sparkQueryDispatcher =
         new SparkQueryDispatcher(
-            sparkJobClient, this.dataSourceService, jobExecutionResponseReader);
+            sparkJobClient,
+            this.dataSourceService,
+            new DataSourceUserAuthorizationHelperImpl(client),
+            jobExecutionResponseReader);
     return new AsyncQueryExecutorServiceImpl(
         asyncQueryJobMetadataStorageService, sparkQueryDispatcher, pluginSettings);
   }

diff --git a/spark/build.gradle b/spark/build.gradle
@@ -7,13 +7,41 @@ plugins {
     id 'java-library'
     id "io.freefair.lombok"
     id 'jacoco'
+    id 'antlr'
 }
 
 repositories {
     mavenCentral()
 }
 
+tasks.register('downloadG4Files', Exec) {
+    description = 'Download remote .g4 files from GitHub'
+
+    executable 'curl'
+//    Need to add these back once the grammar issues with indexName and tableName is addressed in flint integration jar.
+//    args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4'
+//    args '-o', 'src/main/antlr/SparkSqlBase.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4'
+    args '-o', 'src/main/antlr/SqlBaseParser.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4'
+    args '-o', 'src/main/antlr/SqlBaseLexer.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4'
+}
+
+generateGrammarSource {
+    arguments += ['-visitor', '-package', 'org.opensearch.sql.spark.antlr.parser']
+    source = sourceSets.main.antlr
+    outputDirectory = file("build/generated-src/antlr/main/org/opensearch/sql/spark/antlr/parser")
+}
+configurations {
+    compile {
+        extendsFrom = extendsFrom.findAll { it != configurations.antlr }
+    }
+}
+
+// Make sure the downloadG4File task runs before the generateGrammarSource task
+generateGrammarSource.dependsOn downloadG4Files
+
 dependencies {
+    antlr "org.antlr:antlr4:4.7.1"
+
     api project(':core')
     implementation project(':protocol')
     implementation project(':datasources')

diff --git a/spark/src/main/antlr/FlintSparkSqlExtensions.g4 b/spark/src/main/antlr/FlintSparkSqlExtensions.g4
@@ -0,0 +1,91 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+grammar FlintSparkSqlExtensions;
+
+import SparkSqlBase;
+
+
+// Flint SQL Syntax Extension
+
+singleStatement
+    : statement SEMICOLON* EOF
+    ;
+
+statement
+    : skippingIndexStatement
+    | coveringIndexStatement
+    ;
+
+skippingIndexStatement
+    : createSkippingIndexStatement
+    | refreshSkippingIndexStatement
+    | describeSkippingIndexStatement
+    | dropSkippingIndexStatement
+    ;
+
+createSkippingIndexStatement
+    : CREATE SKIPPING INDEX ON tableName
+        LEFT_PAREN indexColTypeList RIGHT_PAREN
+        (WITH LEFT_PAREN propertyList RIGHT_PAREN)?
+    ;
+
+refreshSkippingIndexStatement
+    : REFRESH SKIPPING INDEX ON tableName
+    ;
+
+describeSkippingIndexStatement
+    : (DESC | DESCRIBE) SKIPPING INDEX ON tableName
+    ;
+
+dropSkippingIndexStatement
+    : DROP SKIPPING INDEX ON tableName
+    ;
+
+coveringIndexStatement
+    : createCoveringIndexStatement
+    | refreshCoveringIndexStatement
+    | showCoveringIndexStatement
+    | describeCoveringIndexStatement
+    | dropCoveringIndexStatement
+    ;
+
+createCoveringIndexStatement
+    : CREATE INDEX indexName ON tableName
+        LEFT_PAREN indexColumns=multipartIdentifierPropertyList RIGHT_PAREN
+        (WITH LEFT_PAREN propertyList RIGHT_PAREN)?
+    ;
+
+refreshCoveringIndexStatement
+    : REFRESH INDEX indexName ON tableName
+    ;
+
+showCoveringIndexStatement
+    : SHOW (INDEX | INDEXES) ON tableName
+    ;
+
+describeCoveringIndexStatement
+    : (DESC | DESCRIBE) INDEX indexName ON tableName
+    ;
+
+dropCoveringIndexStatement
+    : DROP INDEX indexName ON tableName
+    ;
+
+indexColTypeList
+    : indexColType (COMMA indexColType)*
+    ;
+
+indexColType
+    : identifier skipType=(PARTITION | VALUE_SET | MIN_MAX)
+    ;
+
+indexName
+    : identifier
+    ;
+
+tableName
+    : multipartIdentifier
+    ;
diff --git a/spark/src/main/antlr/SparkSqlBase.g4 b/spark/src/main/antlr/SparkSqlBase.g4
@@ -0,0 +1,223 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * This file contains code from the Apache Spark project (original license below).
+ * It contains modifications, which are licensed as above:
+ */
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+grammar SparkSqlBase;
+
+// Copy from Spark 3.3.1 SqlBaseParser.g4 and SqlBaseLexer.g4
+
+@members {
+  /**
+   * When true, parser should throw ParseExcetion for unclosed bracketed comment.
+   */
+  public boolean has_unclosed_bracketed_comment = false;
+
+  /**
+   * Verify whether current token is a valid decimal token (which contains dot).
+   * Returns true if the character that follows the token is not a digit or letter or underscore.
+   *
+   * For example:
+   * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
+   * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
+   * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
+   * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is followed
+   * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
+   * which is not a digit or letter or underscore.
+   */
+  public boolean isValidDecimal() {
+    int nextChar = _input.LA(1);
+    if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
+      nextChar == '_') {
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  /**
+   * This method will be called when we see '/*' and try to match it as a bracketed comment.
+   * If the next character is '+', it should be parsed as hint later, and we cannot match
+   * it as a bracketed comment.
+   *
+   * Returns true if the next character is '+'.
+   */
+  public boolean isHint() {
+    int nextChar = _input.LA(1);
+    if (nextChar == '+') {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * This method will be called when the character stream ends and try to find out the
+   * unclosed bracketed comment.
+   * If the method be called, it means the end of the entire character stream match,
+   * and we set the flag and fail later.
+   */
+  public void markUnclosedComment() {
+    has_unclosed_bracketed_comment = true;
+  }
+}
+
+
+multipartIdentifierPropertyList
+    : multipartIdentifierProperty (COMMA multipartIdentifierProperty)*
+    ;
+
+multipartIdentifierProperty
+    : multipartIdentifier (options=propertyList)?
+    ;
+
+propertyList
+    : property (COMMA property)*
+    ;
+
+property
+    : key=propertyKey (EQ? value=propertyValue)?
+    ;
+
+propertyKey
+    : identifier (DOT identifier)*
+    | STRING
+    ;
+
+propertyValue
+    : INTEGER_VALUE
+    | DECIMAL_VALUE
+    | booleanValue
+    | STRING
+    ;
+
+booleanValue
+    : TRUE | FALSE
+    ;
+
+
+multipartIdentifier
+    : parts+=identifier (DOT parts+=identifier)*
+    ;
+
+identifier
+    : IDENTIFIER              #unquotedIdentifier
+    | quotedIdentifier        #quotedIdentifierAlternative
+    | nonReserved             #unquotedIdentifier
+    ;
+
+quotedIdentifier
+    : BACKQUOTED_IDENTIFIER
+    ;
+
+nonReserved
+    : DROP | SKIPPING | INDEX
+    ;
+
+
+// Flint lexical tokens
+
+MIN_MAX: 'MIN_MAX';
+SKIPPING: 'SKIPPING';
+VALUE_SET: 'VALUE_SET';
+
+
+// Spark lexical tokens
+
+SEMICOLON: ';';
+
+LEFT_PAREN: '(';
+RIGHT_PAREN: ')';
+COMMA: ',';
+DOT: '.';
+
+
+CREATE: 'CREATE';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
+DROP: 'DROP';
+FALSE: 'FALSE';
+INDEX: 'INDEX';
+INDEXES: 'INDEXES';
+ON: 'ON';
+PARTITION: 'PARTITION';
+REFRESH: 'REFRESH';
+SHOW: 'SHOW';
+STRING: 'STRING';
+TRUE: 'TRUE';
+WITH: 'WITH';
+
+
+EQ  : '=' | '==';
+MINUS: '-';
+
+
+INTEGER_VALUE
+    : DIGIT+
+    ;
+
+DECIMAL_VALUE
+    : DECIMAL_DIGITS {isValidDecimal()}?
+    ;
+
+IDENTIFIER
+    : (LETTER | DIGIT | '_')+
+    ;
+
+BACKQUOTED_IDENTIFIER
+    : '`' ( ~'`' | '``' )* '`'
+    ;
+
+fragment DECIMAL_DIGITS
+    : DIGIT+ '.' DIGIT*
+    | '.' DIGIT+
+    ;
+
+fragment DIGIT
+    : [0-9]
+    ;
+
+fragment LETTER
+    : [A-Z]
+    ;
+
+SIMPLE_COMMENT
+    : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
+    ;
+
+BRACKETED_COMMENT
+    : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
+    ;
+
+WS
+    : [ \r\n\t]+ -> channel(HIDDEN)
+    ;
+
+// Catch-all for anything we can't recognize.
+// We use this to be able to ignore and recover all the text
+// when splitting statements with DelimiterLexer
+UNRECOGNIZED
+    : .
+    ;