diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 653224c5475f8..249f55fa40ac5 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -838,9 +838,11 @@ tableArgumentPartitioning : ((WITH SINGLE PARTITION) | ((PARTITION | DISTRIBUTE) BY (((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN)) + | (expression (COMMA invalidMultiPartitionExpression=expression)+) | partition+=expression))) ((ORDER | SORT) BY (((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) + | (sortItem (COMMA invalidMultiSortItem=sortItem)+) | sortItem)))? ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7d2355b2f08d1..326f1e7684b9c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1638,6 +1638,20 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { } partitionByExpressions = p.partition.asScala.map(expression).toSeq orderByExpressions = p.sortItem.asScala.map(visitSortItem).toSeq + def invalidPartitionOrOrderingExpression(clause: String): String = { + "The table function call includes a table argument with an invalid " + + s"partitioning/ordering specification: the $clause clause included multiple " + + "expressions without parentheses surrounding them; please add parentheses around " + + "these expressions and then retry the query again" + } + validate( + Option(p.invalidMultiPartitionExpression).isEmpty, + message = invalidPartitionOrOrderingExpression("PARTITION BY"), + ctx = p.invalidMultiPartitionExpression) + validate( + Option(p.invalidMultiSortItem).isEmpty, + message = invalidPartitionOrOrderingExpression("ORDER BY"), + ctx = p.invalidMultiSortItem) } validate( !(withSinglePartition && partitionByExpressions.nonEmpty), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 17dd7349e7bea..8d01040563361 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1617,14 +1617,23 @@ class PlanParserSuite extends AnalysisTest { parameters = Map( "error" -> "'order'", "hint" -> "")) - val sql8 = s"select * from my_tvf(arg1 => table(select col1, col2, col3 from v2) " + - s"$partition by col1, col2 order by col2 asc, col3 desc)" + val sql8tableArg = "table(select col1, col2, col3 from v2)" + val sql8partition = s"$partition by col1, col2 order by col2 asc, col3 desc" + val sql8 = s"select * from my_tvf(arg1 => $sql8tableArg $sql8partition)" checkError( exception = parseException(sql8), - errorClass = "PARSE_SYNTAX_ERROR", + errorClass = "_LEGACY_ERROR_TEMP_0064", parameters = Map( - "error" -> "'order'", - "hint" -> ": extra input 'order'")) + "msg" -> + ("The table function call includes a table argument with an invalid " + + "partitioning/ordering specification: the PARTITION BY clause included multiple " + + "expressions without parentheses surrounding them; please add parentheses around " + + "these expressions and then retry the query again")), + context = ExpectedContext( + fragment = s"$sql8tableArg $sql8partition", + start = 29, + stop = 110 + partition.length) + ) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala index 989597ae041db..1eaf1d24056da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.python import org.apache.spark.api.python.PythonEvalType import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest, Row} import org.apache.spark.sql.catalyst.expressions.{Add, Alias, Expression, FunctionTableSubqueryArgumentExpression, Literal} +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Repartition, RepartitionByExpression, Sort, SubqueryAlias} import org.apache.spark.sql.functions.lit import org.apache.spark.sql.internal.SQLConf @@ -363,4 +364,29 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession { Row("abc")) } } + + test("SPARK-48180: Analyzer bug with multiple ORDER BY items for input table argument") { + assume(shouldTestPythonUDFs) + spark.udtf.registerPython("testUDTF", pythonUDTF) + checkError( + exception = intercept[ParseException](sql( + """ + |SELECT * FROM testUDTF( + | TABLE(SELECT 1 AS device_id, 2 AS data_ds) + | WITH SINGLE PARTITION + | ORDER BY device_id, data_ds) + |""".stripMargin)), + errorClass = "_LEGACY_ERROR_TEMP_0064", + parameters = Map("msg" -> + ("The table function call includes a table argument with an invalid " + + "partitioning/ordering specification: the ORDER BY clause included multiple " + + "expressions without parentheses surrounding them; please add parentheses around these " + + "expressions and then retry the query again")), + context = ExpectedContext( + fragment = "TABLE(SELECT 1 AS device_id, 2 AS data_ds)\n " + + "WITH SINGLE PARTITION\n " + + "ORDER BY device_id, data_ds", + start = 27, + stop = 122)) + } }