Skip to content

Commit

Permalink
Add customized result index in data source etc (#2212)
Browse files Browse the repository at this point in the history
* Add customized result index in data source etc

This PR
- Introduce `spark.flint.datasource.name` parameter for data source specification.
- Enhance data source creation to allow custom result indices; fallback to default if unavailable.
- Include error details in the async result response, sourced from the result index.
- Migrate to `org.apache.spark.sql.FlintJob` following updates in OpenSearch-Spark.
- Populate query status from result index over EMR-S job status to handle edge cases where jobs may succeed, but queries or mappings fail.

Testing done:
1. manual testing including if with or without custom result index async query still works
2. added new unit tests

Signed-off-by: Kaituo Li <[email protected]>

* address comments

Signed-off-by: Kaituo Li <[email protected]>

* revert incorrect change

iiiiii-off-by: Kaituo Li <[email protected]>

---------

Signed-off-by: Kaituo Li <[email protected]>
  • Loading branch information
kaituo authored Oct 5, 2023
1 parent 5df6105 commit 70450e4
Show file tree
Hide file tree
Showing 33 changed files with 302 additions and 145 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,20 @@ public class DataSourceMetadata {

@JsonProperty private Map<String, String> properties;

@JsonProperty private String resultIndex;

public DataSourceMetadata(
String name,
DataSourceType connector,
List<String> allowedRoles,
Map<String, String> properties) {
Map<String, String> properties,
String resultIndex) {
this.name = name;
this.connector = connector;
this.description = StringUtils.EMPTY;
this.properties = properties;
this.allowedRoles = allowedRoles;
this.resultIndex = resultIndex;
}

public DataSourceMetadata() {
Expand All @@ -69,6 +73,7 @@ public static DataSourceMetadata defaultOpenSearchDataSourceMetadata() {
DEFAULT_DATASOURCE_NAME,
DataSourceType.OPENSEARCH,
Collections.emptyList(),
ImmutableMap.of());
ImmutableMap.of(),
null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ public Set<DataSourceMetadata> getDataSourceMetadata(boolean isDefaultDataSource
ds.getName(),
ds.getConnectorType(),
Collections.emptyList(),
ImmutableMap.of()))
ImmutableMap.of(),
null))
.collect(Collectors.toSet());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void testIterator() {
dataSource.getName(),
dataSource.getConnectorType(),
Collections.emptyList(),
ImmutableMap.of()))
ImmutableMap.of(),
null))
.collect(Collectors.toSet());
when(dataSourceService.getDataSourceMetadata(false)).thenReturn(dataSourceMetadata);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class XContentParserUtils {
public static final String PROPERTIES_FIELD = "properties";
public static final String ALLOWED_ROLES_FIELD = "allowedRoles";

public static final String RESULT_INDEX_FIELD = "resultIndex";

/**
* Convert xcontent parser to DataSourceMetadata.
*
Expand All @@ -45,6 +47,7 @@ public static DataSourceMetadata toDataSourceMetadata(XContentParser parser) thr
DataSourceType connector = null;
List<String> allowedRoles = new ArrayList<>();
Map<String, String> properties = new HashMap<>();
String resultIndex = null;
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
Expand Down Expand Up @@ -73,14 +76,18 @@ public static DataSourceMetadata toDataSourceMetadata(XContentParser parser) thr
properties.put(key, value);
}
break;
case RESULT_INDEX_FIELD:
resultIndex = parser.textOrNull();
break;
default:
throw new IllegalArgumentException("Unknown field: " + fieldName);
}
}
if (name == null || connector == null) {
throw new IllegalArgumentException("name and connector are required fields.");
}
return new DataSourceMetadata(name, description, connector, allowedRoles, properties);
return new DataSourceMetadata(
name, description, connector, allowedRoles, properties, resultIndex);
}

/**
Expand Down Expand Up @@ -122,6 +129,7 @@ public static XContentBuilder convertToXContent(DataSourceMetadata metadata) thr
builder.field(entry.getKey(), entry.getValue());
}
builder.endObject();
builder.field(RESULT_INDEX_FIELD, metadata.getResultIndex());
builder.endObject();
return builder;
}
Expand Down
2 changes: 2 additions & 0 deletions datasources/src/main/resources/datasources-index-mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ properties:
keyword:
type: keyword
connector:
type: keyword
resultIndex:
type: keyword
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ void testRemovalOfAuthorizationInfo() {
"testDS",
DataSourceType.PROMETHEUS,
Collections.singletonList("prometheus_access"),
properties);
properties,
null);
when(dataSourceMetadataStorage.getDataSourceMetadata("testDS"))
.thenReturn(Optional.of(dataSourceMetadata));

Expand Down Expand Up @@ -398,7 +399,8 @@ void testGetRawDataSourceMetadata() {
"testDS",
DataSourceType.PROMETHEUS,
Collections.singletonList("prometheus_access"),
properties);
properties,
null);
when(dataSourceMetadataStorage.getDataSourceMetadata("testDS"))
.thenReturn(Optional.of(dataSourceMetadata));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public void testConvertToXContent() {
XContentBuilder contentBuilder = XContentParserUtils.convertToXContent(dataSourceMetadata);
String contentString = BytesReference.bytes(contentBuilder).utf8ToString();
Assertions.assertEquals(
"{\"name\":\"testDS\",\"description\":\"\",\"connector\":\"PROMETHEUS\",\"allowedRoles\":[\"prometheus_access\"],\"properties\":{\"prometheus.uri\":\"https://localhost:9090\"}}",
"{\"name\":\"testDS\",\"description\":\"\",\"connector\":\"PROMETHEUS\",\"allowedRoles\":[\"prometheus_access\"],\"properties\":{\"prometheus.uri\":\"https://localhost:9090\"},\"resultIndex\":null}",
contentString);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ public void createDataSourceAPITest() {
"prometheus.auth.username",
"username",
"prometheus.auth.password",
"password"));
"password"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down Expand Up @@ -96,7 +97,8 @@ public void updateDataSourceAPITest() {
"update_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://localhost:9090"));
ImmutableMap.of("prometheus.uri", "https://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
client().performRequest(createRequest);
// Datasource is not immediately created. so introducing a sleep of 2s.
Expand All @@ -108,7 +110,8 @@ public void updateDataSourceAPITest() {
"update_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://randomtest.com:9090"));
ImmutableMap.of("prometheus.uri", "https://randomtest.com:9090"),
null);
Request updateRequest = getUpdateDataSourceRequest(updateDSM);
Response updateResponse = client().performRequest(updateRequest);
Assert.assertEquals(200, updateResponse.getStatusLine().getStatusCode());
Expand Down Expand Up @@ -141,7 +144,8 @@ public void deleteDataSourceTest() {
"delete_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://localhost:9090"));
ImmutableMap.of("prometheus.uri", "https://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
client().performRequest(createRequest);
// Datasource is not immediately created. so introducing a sleep of 2s.
Expand Down Expand Up @@ -179,7 +183,8 @@ public void getAllDataSourceTest() {
"get_all_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "https://localhost:9090"));
ImmutableMap.of("prometheus.uri", "https://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
client().performRequest(createRequest);
// Datasource is not immediately created. so introducing a sleep of 2s.
Expand Down Expand Up @@ -215,7 +220,8 @@ public void issue2196() {
"prometheus.auth.username",
"username",
"prometheus.auth.password",
"password"));
"password"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ protected void init() throws InterruptedException, IOException {
"my_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "http://localhost:9090"));
ImmutableMap.of("prometheus.uri", "http://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ protected void init() throws InterruptedException, IOException {
"my_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "http://localhost:9090"));
ImmutableMap.of("prometheus.uri", "http://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ protected void init() throws InterruptedException, IOException {
"my_prometheus",
DataSourceType.PROMETHEUS,
ImmutableList.of(),
ImmutableMap.of("prometheus.uri", "http://localhost:9090"));
ImmutableMap.of("prometheus.uri", "http://localhost:9090"),
null);
Request createRequest = getCreateDataSourceRequest(createDSM);
Response response = client().performRequest(createRequest);
Assert.assertEquals(201, response.getStatusLine().getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import static org.opensearch.sql.common.setting.Settings.Key.CLUSTER_NAME;
import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG;
import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD;
import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD;

import com.amazonaws.services.emrserverless.model.JobRunState;
import java.security.AccessController;
Expand Down Expand Up @@ -74,11 +76,13 @@ public CreateAsyncQueryResponse createAsyncQuery(
createAsyncQueryRequest.getLang(),
sparkExecutionEngineConfig.getExecutionRoleARN(),
clusterName.value()));

asyncQueryJobMetadataStorageService.storeJobMetadata(
new AsyncQueryJobMetadata(
sparkExecutionEngineConfig.getApplicationId(),
dispatchQueryResponse.getJobId(),
dispatchQueryResponse.isDropIndexQuery()));
dispatchQueryResponse.isDropIndexQuery(),
dispatchQueryResponse.getResultIndex()));
return new CreateAsyncQueryResponse(dispatchQueryResponse.getJobId());
}

Expand All @@ -89,17 +93,21 @@ public AsyncQueryExecutionResponse getAsyncQueryResults(String queryId) {
asyncQueryJobMetadataStorageService.getJobMetadata(queryId);
if (jobMetadata.isPresent()) {
JSONObject jsonObject = sparkQueryDispatcher.getQueryResponse(jobMetadata.get());
if (JobRunState.SUCCESS.toString().equals(jsonObject.getString("status"))) {
if (JobRunState.SUCCESS.toString().equals(jsonObject.getString(STATUS_FIELD))) {
DefaultSparkSqlFunctionResponseHandle sparkSqlFunctionResponseHandle =
new DefaultSparkSqlFunctionResponseHandle(jsonObject);
List<ExprValue> result = new ArrayList<>();
while (sparkSqlFunctionResponseHandle.hasNext()) {
result.add(sparkSqlFunctionResponseHandle.next());
}
return new AsyncQueryExecutionResponse(
JobRunState.SUCCESS.toString(), sparkSqlFunctionResponseHandle.schema(), result);
JobRunState.SUCCESS.toString(), sparkSqlFunctionResponseHandle.schema(), result, null);
} else {
return new AsyncQueryExecutionResponse(jsonObject.getString("status"), null, null);
return new AsyncQueryExecutionResponse(
jsonObject.optString(STATUS_FIELD, JobRunState.FAILED.toString()),
null,
null,
jsonObject.optString(ERROR_FIELD, ""));
}
}
throw new AsyncQueryNotFoundException(String.format("QueryId: %s not found", queryId));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ public class AsyncQueryExecutionResponse {
private final String status;
private final ExecutionEngine.Schema schema;
private final List<ExprValue> results;
private final String error;
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ public class AsyncQueryJobMetadata {
private String applicationId;
private String jobId;
private boolean isDropIndexQuery;
private String resultIndex;

public AsyncQueryJobMetadata(String applicationId, String jobId) {
public AsyncQueryJobMetadata(String applicationId, String jobId, String resultIndex) {
this.applicationId = applicationId;
this.jobId = jobId;
this.isDropIndexQuery = false;
this.resultIndex = resultIndex;
}

@Override
Expand All @@ -54,6 +56,7 @@ public static XContentBuilder convertToXContent(AsyncQueryJobMetadata metadata)
builder.field("jobId", metadata.getJobId());
builder.field("applicationId", metadata.getApplicationId());
builder.field("isDropIndexQuery", metadata.isDropIndexQuery());
builder.field("resultIndex", metadata.getResultIndex());
builder.endObject();
return builder;
}
Expand Down Expand Up @@ -88,6 +91,7 @@ public static AsyncQueryJobMetadata toJobMetadata(XContentParser parser) throws
String jobId = null;
String applicationId = null;
boolean isDropIndexQuery = false;
String resultIndex = null;
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
Expand All @@ -102,13 +106,16 @@ public static AsyncQueryJobMetadata toJobMetadata(XContentParser parser) throws
case "isDropIndexQuery":
isDropIndexQuery = parser.booleanValue();
break;
case "resultIndex":
resultIndex = parser.textOrNull();
break;
default:
throw new IllegalArgumentException("Unknown field: " + fieldName);
}
}
if (jobId == null || applicationId == null) {
throw new IllegalArgumentException("jobId and applicationId are required fields.");
}
return new AsyncQueryJobMetadata(applicationId, jobId, isDropIndexQuery);
return new AsyncQueryJobMetadata(applicationId, jobId, isDropIndexQuery, resultIndex);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,26 @@
public class AsyncQueryResult extends QueryResult {

@Getter private final String status;
@Getter private final String error;

public AsyncQueryResult(
String status,
ExecutionEngine.Schema schema,
Collection<ExprValue> exprValues,
Cursor cursor) {
Cursor cursor,
String error) {
super(schema, exprValues, cursor);
this.status = status;
this.error = error;
}

public AsyncQueryResult(
String status, ExecutionEngine.Schema schema, Collection<ExprValue> exprValues) {
String status,
ExecutionEngine.Schema schema,
Collection<ExprValue> exprValues,
String error) {
super(schema, exprValues);
this.status = status;
this.error = error;
}
}
Loading

0 comments on commit 70450e4

Please sign in to comment.