Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: disable initCap by default #1276

Merged
merged 3 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions common/src/main/scala/org/apache/comet/CometConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ object CometConf extends ShimCometConf {
createExecEnabledConfig("window", defaultValue = true)
val COMET_EXEC_TAKE_ORDERED_AND_PROJECT_ENABLED: ConfigEntry[Boolean] =
createExecEnabledConfig("takeOrderedAndProject", defaultValue = true)
val COMET_EXEC_INITCAP_ENABLED: ConfigEntry[Boolean] =
createExecEnabledConfig("initCap", defaultValue = false)

val COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.exec.sortMergeJoinWithJoinFilter.enabled")
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Comet provides the following configuration settings.
| spark.comet.exec.filter.enabled | Whether to enable filter by default. | true |
| spark.comet.exec.globalLimit.enabled | Whether to enable globalLimit by default. | true |
| spark.comet.exec.hashJoin.enabled | Whether to enable hashJoin by default. | true |
| spark.comet.exec.initCap.enabled | Whether to enable initCap by default. | false |
| spark.comet.exec.localLimit.enabled | Whether to enable localLimit by default. | true |
| spark.comet.exec.memoryPool | The type of memory pool to be used for Comet native execution. Available memory pool types are 'greedy', 'fair_spill', 'greedy_task_shared', 'fair_spill_task_shared', 'greedy_global' and 'fair_spill_global', By default, this config is 'greedy_task_shared'. | greedy_task_shared |
| spark.comet.exec.project.enabled | Whether to enable project by default. | true |
Expand Down
17 changes: 13 additions & 4 deletions spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1795,10 +1795,19 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
optExprWithInfo(optExpr, expr, child)

case InitCap(child) =>
val castExpr = Cast(child, StringType)
val childExpr = exprToProtoInternal(castExpr, inputs)
val optExpr = scalarExprToProto("initcap", childExpr)
optExprWithInfo(optExpr, expr, castExpr)
if (CometConf.COMET_EXEC_INITCAP_ENABLED.get()) {
val castExpr = Cast(child, StringType)
val childExpr = exprToProtoInternal(castExpr, inputs)
val optExpr = scalarExprToProto("initcap", childExpr)
optExprWithInfo(optExpr, expr, castExpr)
} else {
withInfo(
expr,
"Comet initCap is not compatible with Spark yet. " +
"See https://github.com/apache/datafusion-comet/issues/1052 ." +
s"Set ${CometConf.COMET_EXEC_INITCAP_ENABLED.key}=true to enable it anyway.")
None
}

case Length(child) =>
val castExpr = Cast(child, StringType)
Expand Down
10 changes: 8 additions & 2 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1364,8 +1364,14 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
sql(s"create table $table(id int, name varchar(20)) using parquet")
sql(
s"insert into $table values(1, 'james smith'), (2, 'michael rose'), " +
"(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith')")
checkSparkAnswerAndOperator(s"SELECT initcap(name) FROM $table")
"(3, 'robert williams'), (4, 'rames rose'), (5, 'james smith'), " +
"(6, 'robert rose-smith'), (7, 'james ähtäri')")
if (CometConf.COMET_EXEC_INITCAP_ENABLED.get()) {
// TODO: remove this if clause https://github.com/apache/datafusion-comet/issues/1052
checkSparkAnswerAndOperator(s"SELECT initcap(name) FROM $table")
} else {
checkSparkAnswer(s"SELECT initcap(name) FROM $table")
}
}
}
}
Expand Down
Loading