-
Notifications
You must be signed in to change notification settings - Fork 322
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: fix unsaferow for window (#1298)
* Support config of debugShowNodeDf for openmldb-batch * Add rowToString in spark row util and add unit tests * Update offset for string and non-string columns in row codec * Add unit tests for unsafe row opt * Add data util for openmldb-batch unit tests
- Loading branch information
1 parent
5cc52af
commit 2e7fb76
Showing
11 changed files
with
239 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,9 @@ | ||
### set log levels ### | ||
log4j.rootLogger=stdout,warn,error | ||
|
||
# console log | ||
log4j.rootLogger=WARN, stdout | ||
|
||
# Console log | ||
log4j.appender.stdout = org.apache.log4j.ConsoleAppender | ||
log4j.appender.stdout.Target = System.out | ||
log4j.appender.stdout.Threshold = INFO | ||
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout | ||
log4j.appender.stdout.Encoding=UTF-8 | ||
log4j.appender.stdout.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} [ %c.%M(%F:%L) ] - [ %p ] %m%n | ||
|
||
#info log | ||
log4j.logger.info=info | ||
log4j.appender.info=org.apache.log4j.DailyRollingFileAppender | ||
log4j.appender.info.DatePattern='_'yyyy-MM-dd'.log' | ||
log4j.appender.info.File=logs/info.log | ||
log4j.appender.info.Append=true | ||
log4j.appender.info.Threshold=INFO | ||
log4j.appender.info.Encoding=UTF-8 | ||
log4j.appender.info.layout=org.apache.log4j.PatternLayout | ||
log4j.appender.info.layout.ConversionPattern= %d{yyyy-MM-dd HH:mm:ss} [ %c.%M(%F:%L) ] - [ %p ] %m%n | ||
#debugs log | ||
log4j.logger.debug=debug | ||
log4j.appender.debug=org.apache.log4j.DailyRollingFileAppender | ||
log4j.appender.debug.DatePattern='_'yyyy-MM-dd'.log' | ||
log4j.appender.debug.File=logs/debug.log | ||
log4j.appender.debug.Append=true | ||
log4j.appender.debug.Threshold=DEBUG | ||
log4j.appender.debug.Encoding=UTF-8 | ||
log4j.appender.debug.layout=org.apache.log4j.PatternLayout | ||
log4j.appender.debug.layout.ConversionPattern= %d{yyyy-MM-dd HH:mm:ss} [ %c.%M(%F:%L) ] - [ %p ] %m%n | ||
#warn log | ||
log4j.logger.warn=warn | ||
log4j.appender.warn=org.apache.log4j.DailyRollingFileAppender | ||
log4j.appender.warn.DatePattern='_'yyyy-MM-dd'.log' | ||
log4j.appender.warn.File=logs/warn.log | ||
log4j.appender.warn.Append=true | ||
log4j.appender.warn.Threshold=WARN | ||
log4j.appender.warn.Encoding=UTF-8 | ||
log4j.appender.warn.layout=org.apache.log4j.PatternLayout | ||
log4j.appender.warn.layout.ConversionPattern= %d{yyyy-MM-dd HH:mm:ss} [ %c.%M(%F:%L) ] - [ %p ] %m%n | ||
#error | ||
log4j.logger.error=error | ||
log4j.appender.error = org.apache.log4j.DailyRollingFileAppender | ||
log4j.appender.error.DatePattern='_'yyyy-MM-dd'.log' | ||
log4j.appender.error.File = logs/error.log | ||
log4j.appender.error.Append = true | ||
log4j.appender.error.Threshold = ERROR | ||
log4j.appender.error.Encoding=UTF-8 | ||
log4j.appender.error.layout = org.apache.log4j.PatternLayout | ||
log4j.appender.error.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} [ %c.%M(%F:%L) ] - [ %p ] %m%n | ||
log4j.appender.stdout.layout.ConversionPattern = %c.%M(%F:%L) - %p: %m%n |
51 changes: 51 additions & 0 deletions
51
java/openmldb-batch/src/test/scala/com/_4paradigm/openmldb/batch/end2end/DataUtil.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright 2021 4Paradigm | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com._4paradigm.openmldb.batch.end2end | ||
|
||
import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType} | ||
import org.apache.spark.sql.{DataFrame, Row, SparkSession} | ||
|
||
object DataUtil { | ||
|
||
def getStringDf(spark: SparkSession): DataFrame = { | ||
val data = Seq( | ||
Row(1, "abc", 100) | ||
) | ||
val schema = StructType(List( | ||
StructField("int_col", IntegerType), | ||
StructField("str_col", StringType), | ||
StructField("int_col2", IntegerType) | ||
)) | ||
spark.createDataFrame(spark.sparkContext.makeRDD(data), schema) | ||
} | ||
|
||
def getTestDf(spark: SparkSession): DataFrame = { | ||
val data = Seq( | ||
Row(1, "tom", 100L, 1), | ||
Row(2, "tom", 200L, 2), | ||
Row(3, "tom", 300L, 3), | ||
Row(4, "amy", 400L, 4), | ||
Row(5, "amy", 500L, 5)) | ||
val schema = StructType(List( | ||
StructField("id", IntegerType), | ||
StructField("name", StringType), | ||
StructField("trans_amount", LongType), | ||
StructField("trans_time", IntegerType))) | ||
spark.createDataFrame(spark.sparkContext.makeRDD(data), schema) | ||
} | ||
|
||
} |
52 changes: 52 additions & 0 deletions
52
...batch/src/test/scala/com/_4paradigm/openmldb/batch/end2end/unsafe/TestUnsafeProject.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
* Copyright 2021 4Paradigm | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com._4paradigm.openmldb.batch.end2end.unsafe | ||
|
||
import com._4paradigm.openmldb.batch.SparkTestSuite | ||
import com._4paradigm.openmldb.batch.api.OpenmldbSession | ||
import com._4paradigm.openmldb.batch.end2end.DataUtil | ||
import com._4paradigm.openmldb.batch.utils.SparkUtil | ||
|
||
class TestUnsafeProject extends SparkTestSuite { | ||
|
||
override def customizedBefore(): Unit = { | ||
val spark = getSparkSession | ||
spark.conf.set("spark.openmldb.unsaferow.opt", true) | ||
} | ||
|
||
test("Test unsafe project") { | ||
val spark = getSparkSession | ||
val sess = new OpenmldbSession(spark) | ||
|
||
val df = DataUtil.getStringDf(spark) | ||
sess.registerTable("t1", df) | ||
df.createOrReplaceTempView("t1") | ||
|
||
val sqlText = "SELECT int_col, int_col2 + 1000 FROM t1" | ||
|
||
val outputDf = sess.sql(sqlText) | ||
val sparksqlOutputDf = sess.sparksql(sqlText) | ||
assert(SparkUtil.approximateDfEqual(outputDf.getSparkDf(), sparksqlOutputDf, false)) | ||
|
||
} | ||
|
||
override def customizedAfter(): Unit = { | ||
val spark = getSparkSession | ||
spark.conf.set("spark.openmldb.unsaferow.opt", false) | ||
} | ||
|
||
} |
57 changes: 57 additions & 0 deletions
57
...-batch/src/test/scala/com/_4paradigm/openmldb/batch/end2end/unsafe/TestUnsafeWindow.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Copyright 2021 4Paradigm | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com._4paradigm.openmldb.batch.end2end.unsafe | ||
|
||
import com._4paradigm.openmldb.batch.SparkTestSuite | ||
import com._4paradigm.openmldb.batch.api.OpenmldbSession | ||
import com._4paradigm.openmldb.batch.end2end.DataUtil | ||
import com._4paradigm.openmldb.batch.utils.SparkUtil | ||
|
||
class TestUnsafeWindow extends SparkTestSuite { | ||
|
||
override def customizedBefore(): Unit = { | ||
val spark = getSparkSession | ||
spark.conf.set("spark.openmldb.unsaferow.opt", true) | ||
} | ||
|
||
test("Test unsafe window") { | ||
val spark = getSparkSession | ||
val sess = new OpenmldbSession(spark) | ||
|
||
val df = DataUtil.getTestDf(spark) | ||
sess.registerTable("t1", df) | ||
df.createOrReplaceTempView("t1") | ||
|
||
val sqlText =""" | ||
| SELECT id, sum(trans_amount) OVER w AS w_sum_amount FROM t1 | ||
| WINDOW w AS ( | ||
| PARTITION BY id | ||
| ORDER BY trans_time | ||
| ROWS BETWEEN 10 PRECEDING AND CURRENT ROW); | ||
""".stripMargin | ||
|
||
val outputDf = sess.sql(sqlText) | ||
val sparksqlOutputDf = sess.sparksql(sqlText) | ||
assert(SparkUtil.approximateDfEqual(outputDf.getSparkDf(), sparksqlOutputDf, false)) | ||
} | ||
|
||
override def customizedAfter(): Unit = { | ||
val spark = getSparkSession | ||
spark.conf.set("spark.openmldb.unsaferow.opt", false) | ||
} | ||
|
||
} |
Oops, something went wrong.