[Gluten-1315][CH] Enable tpcds fixed tests (#1646)

Enable tpcds fixed tests
apache · May 16, 2023 · bfbd42c · bfbd42c
1 parent 52c267e
commit bfbd42c
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 37 deletions.
diff --git a/...khouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSAbstractSuite.scala b/...khouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSAbstractSuite.scala
@@ -24,10 +24,12 @@ import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseLog
+import org.apache.spark.sql.types.{StructField, StructType}
 
 import org.apache.commons.io.FileUtils
 
 import java.io.File
+import java.util
 
 import scala.io.Source
 import scala.language.postfixOps
@@ -63,41 +65,27 @@ abstract class GlutenClickHouseTPCDSAbstractSuite extends WholeStageTransformerS
         })
 
   protected val excludedTpcdsQueries: Set[String] = Set(
-    "q4",
-    "q5",
-    "q8",
-    "q14a",
-    "q14b",
-    "q16",
-    "q17",
-    "q18",
-    "q24a",
-    "q24b",
-    "q27",
-    "q31",
-    "q32",
-    "q36",
-    "q39a",
-    "q39b",
-    "q47",
-    "q49",
-    "q57",
-    "q61",
-    "q64",
-    "q67",
-    "q70",
-    "q71",
-    "q74",
-    "q75",
-    "q77",
-    "q78",
-    "q80",
-    "q83",
-    "q86",
-    "q90",
-    "q92",
-    "q94",
-    "q99"
+    "q5", // attribute binding failed.
+    "q14a", // inconsistent results
+    "q14b", // inconsistent results
+    "q17", // inconsistent results
+    "q18", // inconsistent results
+    "q31", // inconsistent results
+    "q32", // attribute binding failed.
+    "q36", // attribute binding failed.
+    "q49", // inconsistent results
+    "q61", // inconsistent results
+    "q64", // fatal
+    "q67", // inconsistent results
+    "q70", // attribute binding failed.
+    "q71", // inconsistent results, unstable order
+    "q75", // attribute binding failed.
+    "q77", // inconsistent results
+    "q78", // inconsistent results
+    "q80", // fatal
+    "q83", // decimal error
+    "q90", // inconsistent results(decimal)
+    "q92" // attribute binding failed.
   )
 
   protected val independentTestTpcdsQueries: Set[String] = Set("q9", "q21")
@@ -208,12 +196,19 @@ abstract class GlutenClickHouseTPCDSAbstractSuite extends WholeStageTransformerS
     val df = spark.sql(Source.fromFile(new File(sqlFile), "UTF-8").mkString)
 
     if (compareResult) {
+      val fields = new util.ArrayList[StructField]()
+      for (elem <- df.schema) {
+        fields.add(
+          StructField
+            .apply(elem.name + fields.size().toString, elem.dataType, elem.nullable, elem.metadata))
+      }
+
       var expectedAnswer: Seq[Row] = null
       withSQLConf(vanillaSparkConfs(): _*) {
         expectedAnswer = spark.read
           .option("delimiter", "|-|")
           .option("nullValue", "null")
-          .schema(df.schema)
+          .schema(StructType.apply(fields))
           .csv(queriesResults + "/" + queryNum + ".out")
           .toDF()
           .collect()

diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -2117,7 +2117,14 @@ const ActionsDAG::Node * SerializedPlanParser::parseExpression(ActionsDAGPtr act
 QueryPlanPtr SerializedPlanParser::parse(const std::string & plan)
 {
     auto plan_ptr = std::make_unique<substrait::Plan>();
-    auto ok = plan_ptr->ParseFromString(plan);
+    /// https://stackoverflow.com/questions/52028583/getting-error-parsing-protobuf-data
+    /// Parsing may fail when the number of recursive layers is large.
+    /// Here, set a limit large enough to avoid this problem.
+    /// Once this problem occurs, it is difficult to troubleshoot, because the pb of c++ will not provide any valid information
+    google::protobuf::io::CodedInputStream coded_in(reinterpret_cast<const uint8_t *>(plan.data()), plan.size());
+    coded_in.SetRecursionLimit(100000);
+
+    auto ok = plan_ptr->ParseFromCodedStream(&coded_in);
     if (!ok)
         throw Exception(ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, "Parse substrait::Plan from string failed");