forked from opensearch-project/sql
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Glue datasource support (opensearch-project#2055)
Signed-off-by: Vamsi Manohar <[email protected]>
- Loading branch information
1 parent
61d1eb7
commit dae6dc3
Showing
10 changed files
with
250 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
56 changes: 56 additions & 0 deletions
56
datasources/src/main/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package org.opensearch.sql.datasources.glue; | ||
|
||
import java.net.URISyntaxException; | ||
import java.net.UnknownHostException; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import lombok.RequiredArgsConstructor; | ||
import org.opensearch.sql.common.setting.Settings; | ||
import org.opensearch.sql.datasource.model.DataSource; | ||
import org.opensearch.sql.datasource.model.DataSourceMetadata; | ||
import org.opensearch.sql.datasource.model.DataSourceType; | ||
import org.opensearch.sql.datasources.utils.DatasourceValidationUtils; | ||
import org.opensearch.sql.storage.DataSourceFactory; | ||
|
||
@RequiredArgsConstructor | ||
public class GlueDataSourceFactory implements DataSourceFactory { | ||
|
||
private final Settings pluginSettings; | ||
|
||
// Glue configuration properties | ||
public static final String GLUE_AUTH_TYPE = "glue.auth.type"; | ||
public static final String GLUE_ROLE_ARN = "glue.auth.role_arn"; | ||
public static final String FLINT_URI = "glue.indexstore.opensearch.uri"; | ||
public static final String FLINT_AUTH = "glue.indexstore.opensearch.auth"; | ||
public static final String FLINT_REGION = "glue.indexstore.opensearch.region"; | ||
|
||
@Override | ||
public DataSourceType getDataSourceType() { | ||
return DataSourceType.S3GLUE; | ||
} | ||
|
||
@Override | ||
public DataSource createDataSource(DataSourceMetadata metadata) { | ||
try { | ||
validateGlueDataSourceConfiguration(metadata.getProperties()); | ||
return new DataSource( | ||
metadata.getName(), | ||
metadata.getConnector(), | ||
(dataSourceSchemaName, tableName) -> { | ||
throw new UnsupportedOperationException("Glue storage engine is not supported."); | ||
}); | ||
} catch (URISyntaxException | UnknownHostException e) { | ||
throw new IllegalArgumentException("Invalid flint host in properties."); | ||
} | ||
} | ||
|
||
private void validateGlueDataSourceConfiguration(Map<String, String> dataSourceMetadataConfig) | ||
throws URISyntaxException, UnknownHostException { | ||
DatasourceValidationUtils.validateLengthAndRequiredFields( | ||
dataSourceMetadataConfig, | ||
Set.of(GLUE_AUTH_TYPE, GLUE_ROLE_ARN, FLINT_URI, FLINT_REGION, FLINT_AUTH)); | ||
DatasourceValidationUtils.validateHost( | ||
dataSourceMetadataConfig.get(FLINT_URI), | ||
pluginSettings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
datasources/src/test/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactoryTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package org.opensearch.sql.datasources.glue; | ||
|
||
import static org.mockito.Mockito.when; | ||
|
||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import lombok.SneakyThrows; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import org.mockito.Mock; | ||
import org.mockito.junit.jupiter.MockitoExtension; | ||
import org.opensearch.sql.DataSourceSchemaName; | ||
import org.opensearch.sql.common.setting.Settings; | ||
import org.opensearch.sql.datasource.model.DataSource; | ||
import org.opensearch.sql.datasource.model.DataSourceMetadata; | ||
import org.opensearch.sql.datasource.model.DataSourceType; | ||
|
||
@ExtendWith(MockitoExtension.class) | ||
public class GlueDataSourceFactoryTest { | ||
|
||
@Mock private Settings settings; | ||
|
||
@Test | ||
void testGetConnectorType() { | ||
GlueDataSourceFactory glueDatasourceFactory = new GlueDataSourceFactory(settings); | ||
Assertions.assertEquals(DataSourceType.S3GLUE, glueDatasourceFactory.getDataSourceType()); | ||
} | ||
|
||
@Test | ||
@SneakyThrows | ||
void testCreateGLueDatSource() { | ||
when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) | ||
.thenReturn(Collections.emptyList()); | ||
GlueDataSourceFactory glueDatasourceFactory = new GlueDataSourceFactory(settings); | ||
|
||
DataSourceMetadata metadata = new DataSourceMetadata(); | ||
HashMap<String, String> properties = new HashMap<>(); | ||
properties.put("glue.auth.type", "iam_role"); | ||
properties.put("glue.auth.role_arn", "role_arn"); | ||
properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); | ||
properties.put("glue.indexstore.opensearch.auth", "false"); | ||
properties.put("glue.indexstore.opensearch.region", "us-west-2"); | ||
|
||
metadata.setName("my_glue"); | ||
metadata.setConnector(DataSourceType.S3GLUE); | ||
metadata.setProperties(properties); | ||
DataSource dataSource = glueDatasourceFactory.createDataSource(metadata); | ||
Assertions.assertEquals(DataSourceType.S3GLUE, dataSource.getConnectorType()); | ||
UnsupportedOperationException unsupportedOperationException = | ||
Assertions.assertThrows( | ||
UnsupportedOperationException.class, | ||
() -> | ||
dataSource | ||
.getStorageEngine() | ||
.getTable(new DataSourceSchemaName("my_glue", "default"), "alb_logs")); | ||
Assertions.assertEquals( | ||
"Glue storage engine is not supported.", unsupportedOperationException.getMessage()); | ||
} | ||
|
||
@Test | ||
@SneakyThrows | ||
void testCreateGLueDatSourceWithInvalidFlintHost() { | ||
when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) | ||
.thenReturn(List.of("127.0.0.0/8")); | ||
GlueDataSourceFactory glueDatasourceFactory = new GlueDataSourceFactory(settings); | ||
|
||
DataSourceMetadata metadata = new DataSourceMetadata(); | ||
HashMap<String, String> properties = new HashMap<>(); | ||
properties.put("glue.auth.type", "iam_role"); | ||
properties.put("glue.auth.role_arn", "role_arn"); | ||
properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); | ||
properties.put("glue.indexstore.opensearch.auth", "false"); | ||
properties.put("glue.indexstore.opensearch.region", "us-west-2"); | ||
|
||
metadata.setName("my_glue"); | ||
metadata.setConnector(DataSourceType.S3GLUE); | ||
metadata.setProperties(properties); | ||
IllegalArgumentException illegalArgumentException = | ||
Assertions.assertThrows( | ||
IllegalArgumentException.class, () -> glueDatasourceFactory.createDataSource(metadata)); | ||
Assertions.assertEquals( | ||
"Disallowed hostname in the uri. " | ||
+ "Validate with plugins.query.datasources.uri.hosts.denylist config", | ||
illegalArgumentException.getMessage()); | ||
} | ||
|
||
@Test | ||
@SneakyThrows | ||
void testCreateGLueDatSourceWithInvalidFlintHostSyntax() { | ||
when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) | ||
.thenReturn(List.of("127.0.0.0/8")); | ||
GlueDataSourceFactory glueDatasourceFactory = new GlueDataSourceFactory(settings); | ||
|
||
DataSourceMetadata metadata = new DataSourceMetadata(); | ||
HashMap<String, String> properties = new HashMap<>(); | ||
properties.put("glue.auth.type", "iam_role"); | ||
properties.put("glue.auth.role_arn", "role_arn"); | ||
properties.put( | ||
"glue.indexstore.opensearch.uri", | ||
"http://dummyprometheus.com:9090? paramt::localhost:9200"); | ||
properties.put("glue.indexstore.opensearch.auth", "false"); | ||
properties.put("glue.indexstore.opensearch.region", "us-west-2"); | ||
|
||
metadata.setName("my_glue"); | ||
metadata.setConnector(DataSourceType.S3GLUE); | ||
metadata.setProperties(properties); | ||
IllegalArgumentException illegalArgumentException = | ||
Assertions.assertThrows( | ||
IllegalArgumentException.class, () -> glueDatasourceFactory.createDataSource(metadata)); | ||
Assertions.assertEquals( | ||
"Invalid flint host in properties.", illegalArgumentException.getMessage()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
.. highlight:: sh | ||
|
||
==================== | ||
S3Glue Connector | ||
==================== | ||
|
||
.. rubric:: Table of contents | ||
|
||
.. contents:: | ||
:local: | ||
:depth: 1 | ||
|
||
|
||
Introduction | ||
============ | ||
|
||
s3Glue connector provides a way to query s3 files using glue as metadata store and spark as execution engine. | ||
This page covers s3Glue datasource configuration and also how to query and s3Glue datasource. | ||
|
||
|
||
Required resources for s3 Glue Connector | ||
=================================== | ||
* S3: This is where the data lies. | ||
* Spark Execution Engine: Query Execution happens on spark. | ||
* Glue Metadata store: Glue takes care of table metadata. | ||
* Opensearch: Index for s3 data lies in opensearch and also acts as temporary buffer for query results. | ||
|
||
We currently only support emr-serverless as spark execution engine and Glue as metadata store. we will add more support in future. | ||
|
||
Glue Connector Properties in DataSource Configuration | ||
======================================================== | ||
Glue Connector Properties. | ||
|
||
* ``glue.auth.type`` [Required] | ||
* This parameters provides the authentication type information required for execution engine to connect to glue. | ||
* S3 Glue connector currently only supports ``iam_role`` authentication and the below parameters is required. | ||
* ``glue.auth.role_arn`` | ||
* ``glue.indexstore.opensearch.*`` [Required] | ||
* This parameters provides the Opensearch domain host information for glue connector. This opensearch instance is used for writing index data back and also | ||
* ``glue.indexstore.opensearch.uri`` [Required] | ||
* ``glue.indexstore.opensearch.auth`` [Required] | ||
* Default value for auth is ``false``. | ||
* ``glue.indexstore.opensearch.region`` [Required] | ||
* Default value for auth is ``us-west-2``. | ||
|
||
Sample Glue dataSource configuration | ||
======================================== | ||
|
||
Glue datasource configuration:: | ||
|
||
[{ | ||
"name" : "my_glue", | ||
"connector": "s3glue", | ||
"properties" : { | ||
"glue.auth.type": "iam_role", | ||
"glue.auth.role_arn": "role_arn", | ||
"glue.indexstore.opensearch.uri": "http://localhost:9200", | ||
"glue.indexstore.opensearch.auth" :"false", | ||
"glue.indexstore.opensearch.region": "us-west-2" | ||
} | ||
}] | ||
|
||
|
||
Sample s3Glue datasource queries | ||
================================ | ||
<To Be Added> | ||
|
||
|
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters