From 144913fc4411aa387d427e919bc82eea96d6b784 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 5 Oct 2023 18:25:45 +0000 Subject: [PATCH] Enable PPL Queries (#2215) Signed-off-by: Vamsi Manohar (cherry picked from commit a83482d9ef5822b15f973b89970d80a497fea430) Signed-off-by: github-actions[bot] --- .../spark/asyncquery/model/SparkSubmitParameters.java | 6 ++++-- .../sql/spark/data/constants/SparkConstants.java | 9 +++++---- .../sql/spark/dispatcher/SparkQueryDispatcherTest.java | 6 ++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java index f9f0b8ed8d..1f7bf4b9fb 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java +++ b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java @@ -45,7 +45,9 @@ private Builder() { config.put( HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY, DEFAULT_GLUE_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY); - config.put(SPARK_JAR_PACKAGES_KEY, SPARK_STANDALONE_PACKAGE + "," + SPARK_LAUNCHER_PACKAGE); + config.put( + SPARK_JAR_PACKAGES_KEY, + SPARK_STANDALONE_PACKAGE + "," + SPARK_LAUNCHER_PACKAGE + "," + PPL_STANDALONE_PACKAGE); config.put(SPARK_JAR_REPOSITORIES_KEY, AWS_SNAPSHOT_REPOSITORY); config.put(SPARK_DRIVER_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); config.put(SPARK_EXECUTOR_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); @@ -54,7 +56,7 @@ private Builder() { config.put(FLINT_INDEX_STORE_SCHEME_KEY, FLINT_DEFAULT_SCHEME); config.put(FLINT_INDEX_STORE_AUTH_KEY, FLINT_DEFAULT_AUTH); config.put(FLINT_CREDENTIALS_PROVIDER_KEY, EMR_ASSUME_ROLE_CREDENTIALS_PROVIDER); - config.put(SPARK_SQL_EXTENSIONS_KEY, FLINT_SQL_EXTENSION); + config.put(SPARK_SQL_EXTENSIONS_KEY, FLINT_SQL_EXTENSION + "," + FLINT_PPL_EXTENSION); config.put(HIVE_METASTORE_CLASS_KEY, GLUE_HIVE_CATALOG_FACTORY_CLASS); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java b/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java index 7fc71458d0..284afcc0a9 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java +++ b/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java @@ -25,9 +25,6 @@ public class SparkConstants { public static final String FLINT_INTEGRATION_JAR = "s3://spark-datasource/flint-spark-integration-assembly-0.1.0-SNAPSHOT.jar"; // TODO should be replaced with mvn jar. - public static final String GLUE_CATALOG_HIVE_JAR = - "s3://flint-data-dp-eu-west-1-beta/code/flint/AWSGlueDataCatalogHiveMetaStoreAuth-1.0.jar"; - // TODO should be replaced with mvn jar. public static final String FLINT_CATALOG_JAR = "s3://flint-data-dp-eu-west-1-beta/code/flint/flint-catalog.jar"; public static final String FLINT_DEFAULT_HOST = "localhost"; @@ -45,7 +42,6 @@ public class SparkConstants { public static final String HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY = "spark.hadoop.aws.catalog.credentials.provider.factory.class"; public static final String HIVE_METASTORE_GLUE_ARN_KEY = "spark.hive.metastore.glue.role.arn"; - public static final String SPARK_JARS_KEY = "spark.jars"; public static final String SPARK_JAR_PACKAGES_KEY = "spark.jars.packages"; public static final String SPARK_JAR_REPOSITORIES_KEY = "spark.jars.repositories"; public static final String SPARK_DRIVER_ENV_JAVA_HOME_KEY = @@ -74,6 +70,8 @@ public class SparkConstants { "org.opensearch:opensearch-spark-standalone_2.12:0.1.0-SNAPSHOT"; public static final String SPARK_LAUNCHER_PACKAGE = "org.opensearch:opensearch-spark-sql-application_2.12:0.1.0-SNAPSHOT"; + public static final String PPL_STANDALONE_PACKAGE = + "org.opensearch:opensearch-spark-ppl_2.12:0.1.0-SNAPSHOT"; public static final String AWS_SNAPSHOT_REPOSITORY = "https://aws.oss.sonatype.org/content/repositories/snapshots"; public static final String GLUE_HIVE_CATALOG_FACTORY_CLASS = @@ -82,6 +80,9 @@ public class SparkConstants { "org.opensearch.sql.FlintDelegatingSessionCatalog"; public static final String FLINT_SQL_EXTENSION = "org.opensearch.flint.spark.FlintSparkExtensions"; + public static final String FLINT_PPL_EXTENSION = + "org.opensearch.flint.spark.FlintPPLSparkExtensions"; + public static final String EMR_ASSUME_ROLE_CREDENTIALS_PROVIDER = "com.amazonaws.emr.AssumeRoleAWSCredentialsProvider"; public static final String JAVA_HOME_LOCATION = "/usr/lib/jvm/java-17-amazon-corretto.x86_64/"; diff --git a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java index 4c04381f36..c89c122d11 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java @@ -774,7 +774,7 @@ private String constructExpectedSparkSubmitParameterString( + " --conf" + " spark.hadoop.aws.catalog.credentials.provider.factory.class=com.amazonaws.glue.catalog.metastore.STSAssumeRoleSessionCredentialsProviderFactory" + " --conf" - + " spark.jars.packages=org.opensearch:opensearch-spark-standalone_2.12:0.1.0-SNAPSHOT,org.opensearch:opensearch-spark-sql-application_2.12:0.1.0-SNAPSHOT" + + " spark.jars.packages=org.opensearch:opensearch-spark-standalone_2.12:0.1.0-SNAPSHOT,org.opensearch:opensearch-spark-sql-application_2.12:0.1.0-SNAPSHOT,org.opensearch:opensearch-spark-ppl_2.12:0.1.0-SNAPSHOT" + " --conf" + " spark.jars.repositories=https://aws.oss.sonatype.org/content/repositories/snapshots" + " --conf" @@ -787,7 +787,9 @@ private String constructExpectedSparkSubmitParameterString( + auth + " --conf" + " spark.datasource.flint.customAWSCredentialsProvider=com.amazonaws.emr.AssumeRoleAWSCredentialsProvider" - + " --conf spark.sql.extensions=org.opensearch.flint.spark.FlintSparkExtensions --conf" + + " --conf" + + " spark.sql.extensions=org.opensearch.flint.spark.FlintSparkExtensions,org.opensearch.flint.spark.FlintPPLSparkExtensions" + + " --conf" + " spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" + " --conf" + " spark.emr-serverless.driverEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::924196221507:role/FlintOpensearchServiceRole"