-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add create and drop materialized view SQL support (#73)
* Add MV grammar with empty impl Signed-off-by: Chen Dai <[email protected]> * Find mv query in origin Signed-off-by: Chen Dai <[email protected]> * Implement create and drop statement in ast builder Signed-off-by: Chen Dai <[email protected]> * Add MV SQL IT Signed-off-by: Chen Dai <[email protected]> * Add more IT for create statement Signed-off-by: Chen Dai <[email protected]> * Add more IT for drop statement Signed-off-by: Chen Dai <[email protected]> * Update user manual with MV Signed-off-by: Chen Dai <[email protected]> * Update doc with MV index naming convention Signed-off-by: Chen Dai <[email protected]> --------- Signed-off-by: Chen Dai <[email protected]>
- Loading branch information
Showing
7 changed files
with
287 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
...c/main/scala/org/opensearch/flint/spark/sql/mv/FlintSparkMaterializedViewAstBuilder.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.sql.mv | ||
|
||
import org.antlr.v4.runtime.tree.RuleNode | ||
import org.opensearch.flint.spark.FlintSpark | ||
import org.opensearch.flint.spark.FlintSpark.RefreshMode | ||
import org.opensearch.flint.spark.mv.FlintSparkMaterializedView | ||
import org.opensearch.flint.spark.sql.{FlintSparkSqlCommand, FlintSparkSqlExtensionsVisitor, SparkSqlAstBuilder} | ||
import org.opensearch.flint.spark.sql.FlintSparkSqlAstBuilder.getFullTableName | ||
import org.opensearch.flint.spark.sql.FlintSparkSqlExtensionsParser.{CreateMaterializedViewStatementContext, DropMaterializedViewStatementContext, MaterializedViewQueryContext} | ||
|
||
import org.apache.spark.sql.catalyst.trees.CurrentOrigin | ||
|
||
/** | ||
* Flint Spark AST builder that builds Spark command for Flint materialized view statement. | ||
*/ | ||
trait FlintSparkMaterializedViewAstBuilder extends FlintSparkSqlExtensionsVisitor[AnyRef] { | ||
self: SparkSqlAstBuilder => | ||
|
||
override def visitCreateMaterializedViewStatement( | ||
ctx: CreateMaterializedViewStatementContext): AnyRef = { | ||
FlintSparkSqlCommand() { flint => | ||
val mvName = getFullTableName(flint, ctx.mvName) | ||
val query = getMvQuery(ctx.query) | ||
|
||
val mvBuilder = flint | ||
.materializedView() | ||
.name(mvName) | ||
.query(query) | ||
|
||
val ignoreIfExists = ctx.EXISTS() != null | ||
val indexOptions = visitPropertyList(ctx.propertyList()) | ||
mvBuilder | ||
.options(indexOptions) | ||
.create(ignoreIfExists) | ||
|
||
// Trigger auto refresh if enabled | ||
if (indexOptions.autoRefresh()) { | ||
val flintIndexName = getFlintIndexName(flint, ctx.mvName) | ||
flint.refreshIndex(flintIndexName, RefreshMode.INCREMENTAL) | ||
} | ||
Seq.empty | ||
} | ||
} | ||
|
||
override def visitDropMaterializedViewStatement( | ||
ctx: DropMaterializedViewStatementContext): AnyRef = { | ||
FlintSparkSqlCommand() { flint => | ||
flint.deleteIndex(getFlintIndexName(flint, ctx.mvName)) | ||
Seq.empty | ||
} | ||
} | ||
|
||
private def getMvQuery(ctx: MaterializedViewQueryContext): String = { | ||
// Assume origin must be preserved at the beginning of parsing | ||
val sqlText = CurrentOrigin.get.sqlText.get | ||
val startIndex = ctx.getStart.getStartIndex | ||
val stopIndex = ctx.getStop.getStopIndex | ||
sqlText.substring(startIndex, stopIndex + 1) | ||
} | ||
|
||
private def getFlintIndexName(flint: FlintSpark, mvNameCtx: RuleNode): String = { | ||
val fullMvName = getFullTableName(flint, mvNameCtx) | ||
FlintSparkMaterializedView.getFlintIndexName(fullMvName) | ||
} | ||
} |
145 changes: 145 additions & 0 deletions
145
...test/src/test/scala/org/opensearch/flint/spark/FlintSparkMaterializedViewSqlITSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark | ||
|
||
import java.sql.Timestamp | ||
|
||
import scala.Option.empty | ||
import scala.collection.JavaConverters.mapAsJavaMapConverter | ||
|
||
import org.json4s.{Formats, NoTypeHints} | ||
import org.json4s.native.JsonMethods.parse | ||
import org.json4s.native.Serialization | ||
import org.opensearch.flint.core.FlintOptions | ||
import org.opensearch.flint.core.storage.FlintOpenSearchClient | ||
import org.opensearch.flint.spark.mv.FlintSparkMaterializedView.getFlintIndexName | ||
import org.scalatest.matchers.must.Matchers.defined | ||
import org.scalatest.matchers.should.Matchers.{convertToAnyShouldWrapper, the} | ||
|
||
import org.apache.spark.sql.Row | ||
|
||
class FlintSparkMaterializedViewSqlITSuite extends FlintSparkSuite { | ||
|
||
/** Test table, MV, index name and query */ | ||
private val testTable = "spark_catalog.default.mv_test" | ||
private val testMvName = "spark_catalog.default.mv_test_metrics" | ||
private val testFlintIndex = getFlintIndexName(testMvName) | ||
private val testQuery = | ||
s""" | ||
| SELECT | ||
| window.start AS startTime, | ||
| COUNT(*) AS count | ||
| FROM $testTable | ||
| GROUP BY TUMBLE(time, '10 Minutes') | ||
|""".stripMargin | ||
|
||
override def beforeAll(): Unit = { | ||
super.beforeAll() | ||
createTimeSeriesTable(testTable) | ||
} | ||
|
||
override def afterEach(): Unit = { | ||
super.afterEach() | ||
flint.deleteIndex(testFlintIndex) | ||
} | ||
|
||
test("create materialized view with auto refresh") { | ||
withTempDir { checkpointDir => | ||
sql(s""" | ||
| CREATE MATERIALIZED VIEW $testMvName | ||
| AS $testQuery | ||
| WITH ( | ||
| auto_refresh = true, | ||
| checkpoint_location = '${checkpointDir.getAbsolutePath}' | ||
| ) | ||
|""".stripMargin) | ||
|
||
// Wait for streaming job complete current micro batch | ||
val job = spark.streams.active.find(_.name == testFlintIndex) | ||
job shouldBe defined | ||
failAfter(streamingTimeout) { | ||
job.get.processAllAvailable() | ||
} | ||
|
||
flint.describeIndex(testFlintIndex) shouldBe defined | ||
checkAnswer( | ||
flint.queryIndex(testFlintIndex).select("startTime", "count"), | ||
Seq( | ||
Row(timestamp("2023-10-01 00:00:00"), 1), | ||
Row(timestamp("2023-10-01 00:10:00"), 2), | ||
Row(timestamp("2023-10-01 01:00:00"), 1) | ||
/* | ||
* The last row is pending to fire upon watermark | ||
* Row(timestamp("2023-10-01 02:00:00"), 1) | ||
*/ | ||
)) | ||
} | ||
} | ||
|
||
test("create materialized view with streaming job options") { | ||
withTempDir { checkpointDir => | ||
sql(s""" | ||
| CREATE MATERIALIZED VIEW $testMvName | ||
| AS $testQuery | ||
| WITH ( | ||
| auto_refresh = true, | ||
| refresh_interval = '5 Seconds', | ||
| checkpoint_location = '${checkpointDir.getAbsolutePath}', | ||
| index_settings = '{"number_of_shards": 3, "number_of_replicas": 2}' | ||
| ) | ||
|""".stripMargin) | ||
|
||
val index = flint.describeIndex(testFlintIndex) | ||
index shouldBe defined | ||
index.get.options.autoRefresh() shouldBe true | ||
index.get.options.refreshInterval() shouldBe Some("5 Seconds") | ||
index.get.options.checkpointLocation() shouldBe Some(checkpointDir.getAbsolutePath) | ||
} | ||
} | ||
|
||
test("create materialized view with index settings") { | ||
sql(s""" | ||
| CREATE MATERIALIZED VIEW $testMvName | ||
| AS $testQuery | ||
| WITH ( | ||
| index_settings = '{"number_of_shards": 3, "number_of_replicas": 2}' | ||
| ) | ||
|""".stripMargin) | ||
|
||
// Check if the index setting option is set to OS index setting | ||
val flintClient = new FlintOpenSearchClient(new FlintOptions(openSearchOptions.asJava)) | ||
|
||
implicit val formats: Formats = Serialization.formats(NoTypeHints) | ||
val settings = parse(flintClient.getIndexMetadata(testFlintIndex).indexSettings.get) | ||
(settings \ "index.number_of_shards").extract[String] shouldBe "3" | ||
(settings \ "index.number_of_replicas").extract[String] shouldBe "2" | ||
} | ||
|
||
test("create materialized view if not exists") { | ||
sql(s"CREATE MATERIALIZED VIEW IF NOT EXISTS $testMvName AS $testQuery") | ||
flint.describeIndex(testFlintIndex) shouldBe defined | ||
|
||
// Expect error without IF NOT EXISTS, otherwise success | ||
the[IllegalStateException] thrownBy | ||
sql(s"CREATE MATERIALIZED VIEW $testMvName AS $testQuery") | ||
|
||
sql(s"CREATE MATERIALIZED VIEW IF NOT EXISTS $testMvName AS $testQuery") | ||
} | ||
|
||
test("drop materialized view") { | ||
flint | ||
.materializedView() | ||
.name(testMvName) | ||
.query(testQuery) | ||
.create() | ||
|
||
sql(s"DROP MATERIALIZED VIEW $testMvName") | ||
|
||
flint.describeIndex(testFlintIndex) shouldBe empty | ||
} | ||
|
||
private def timestamp(ts: String): Timestamp = Timestamp.valueOf(ts) | ||
} |