From 110b558570176d5d5ee5ba85bb071fd66a94a7a0 Mon Sep 17 00:00:00 2001 From: Wei Guo Date: Fri, 19 Jul 2024 09:21:30 +0900 Subject: [PATCH] [SPARK-48915][SQL][TESTS][FOLLOWUP] Add some uncovered predicates(!=, <, <=, >, >=) for correlation in `GeneratedSubquerySuite` ### What changes were proposed in this pull request? In PR #47386, we improves coverage of predicate types of scalar subquery in the WHERE clause. Follow up, this PR as aims to add some uncovered predicates(!=, <, <=, >, >=) for correlation in `GeneratedSubquerySuite`. ### Why are the changes needed? Better coverage of current subquery tests with correlation in `GeneratedSubquerySuite`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47399 from wayneguow/SPARK-48915_follow_up. Authored-by: Wei Guo Signed-off-by: Hyukjin Kwon --- .../querytest/GeneratedSubquerySuite.scala | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala index fd3dafdda4996..8cde20529d7a5 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala @@ -162,7 +162,7 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera * @param subqueryAlias * @param subqueryLocation The clause of the main query where the subquery is located. * @param subqueryType The type of subquery, such as SCALAR, RELATION, PREDICATE - * @param isCorrelated Whether the subquery is to be correlated. + * @param correlationConditions The correlated conditions of subquery. * @param isDistinct Whether subquery results is to be de-duplicated, i.e. have a DISTINCT clause. * @param operatorInSubquery The operator to be included in the subquery. */ @@ -172,16 +172,10 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera subqueryAlias: String, subqueryLocation: SubqueryLocation.Value, subqueryType: SubqueryType.Value, - isCorrelated: Boolean, + correlationConditions: Seq[Predicate], isDistinct: Boolean, operatorInSubquery: Operator): Query = { - // Correlation conditions, this is hardcoded for now. - val correlationConditions = if (isCorrelated) { - Seq(Equals(innerTable.output.head, outerTable.output.head)) - } else { - Seq() - } val isScalarSubquery = Seq(SubqueryType.ATTRIBUTE, SubqueryType.SCALAR_PREDICATE_EQUALS, SubqueryType.SCALAR_PREDICATE_NOT_EQUALS, SubqueryType.SCALAR_PREDICATE_LESS_THAN, SubqueryType.SCALAR_PREDICATE_LESS_THAN_OR_EQUALS, @@ -324,9 +318,23 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera case _ => Seq(true, false) } + def generateCorrelationConditions(innerTable: Relation, outerTable: Relation, + isCorrelated: Boolean): Seq[Seq[Predicate]] = { + if (isCorrelated) { + Seq(Seq(Equals(innerTable.output.head, outerTable.output.head)), + Seq(NotEquals(innerTable.output.head, outerTable.output.head)), + Seq(LessThan(innerTable.output.head, outerTable.output.head)), + Seq(LessThanOrEquals(innerTable.output.head, outerTable.output.head)), + Seq(GreaterThan(innerTable.output.head, outerTable.output.head)), + Seq(GreaterThanOrEquals(innerTable.output.head, outerTable.output.head))) + } else { + Seq(Seq()) + } + } + def distinctChoices(subqueryOperator: Operator): Seq[Boolean] = { subqueryOperator match { - // Don't do DISTINCT if there is no group by because it is redundant. + // Don't do DISTINCT if there is group by because it is redundant. case Aggregate(_, groupingExpressions) if groupingExpressions.isEmpty => Seq(false) case _ => Seq(true, false) } @@ -343,6 +351,7 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera Seq(SubqueryLocation.WHERE, SubqueryLocation.SELECT, SubqueryLocation.FROM) subqueryType <- subqueryTypeChoices(subqueryLocation) isCorrelated <- correlationChoices(subqueryLocation) + correlationCondition <- generateCorrelationConditions(innerTable, outerTable, isCorrelated) } { // Hardcoded aggregation column and group by column. val (aggColumn, groupByColumn) = innerTable.output.head -> innerTable.output(1) @@ -361,7 +370,7 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera isDistinct <- distinctChoices(subqueryOperator) } { generatedQuerySpecs += SubquerySpec(generateQuery(innerTable, outerTable, - subqueryAlias, subqueryLocation, subqueryType, isCorrelated, isDistinct, + subqueryAlias, subqueryLocation, subqueryType, correlationCondition, isDistinct, subqueryOperator).toString + ";", isCorrelated, subqueryType) } }