Skip to content

Commit

Permalink
[Gluten-1315][CH] Enable tpcds fixed tests (#1646)
Browse files Browse the repository at this point in the history
Enable tpcds fixed tests
  • Loading branch information
loneylee authored May 16, 2023
1 parent 52c267e commit bfbd42c
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseLog
import org.apache.spark.sql.types.{StructField, StructType}

import org.apache.commons.io.FileUtils

import java.io.File
import java.util

import scala.io.Source
import scala.language.postfixOps
Expand Down Expand Up @@ -63,41 +65,27 @@ abstract class GlutenClickHouseTPCDSAbstractSuite extends WholeStageTransformerS
})

protected val excludedTpcdsQueries: Set[String] = Set(
"q4",
"q5",
"q8",
"q14a",
"q14b",
"q16",
"q17",
"q18",
"q24a",
"q24b",
"q27",
"q31",
"q32",
"q36",
"q39a",
"q39b",
"q47",
"q49",
"q57",
"q61",
"q64",
"q67",
"q70",
"q71",
"q74",
"q75",
"q77",
"q78",
"q80",
"q83",
"q86",
"q90",
"q92",
"q94",
"q99"
"q5", // attribute binding failed.
"q14a", // inconsistent results
"q14b", // inconsistent results
"q17", // inconsistent results
"q18", // inconsistent results
"q31", // inconsistent results
"q32", // attribute binding failed.
"q36", // attribute binding failed.
"q49", // inconsistent results
"q61", // inconsistent results
"q64", // fatal
"q67", // inconsistent results
"q70", // attribute binding failed.
"q71", // inconsistent results, unstable order
"q75", // attribute binding failed.
"q77", // inconsistent results
"q78", // inconsistent results
"q80", // fatal
"q83", // decimal error
"q90", // inconsistent results(decimal)
"q92" // attribute binding failed.
)

protected val independentTestTpcdsQueries: Set[String] = Set("q9", "q21")
Expand Down Expand Up @@ -208,12 +196,19 @@ abstract class GlutenClickHouseTPCDSAbstractSuite extends WholeStageTransformerS
val df = spark.sql(Source.fromFile(new File(sqlFile), "UTF-8").mkString)

if (compareResult) {
val fields = new util.ArrayList[StructField]()
for (elem <- df.schema) {
fields.add(
StructField
.apply(elem.name + fields.size().toString, elem.dataType, elem.nullable, elem.metadata))
}

var expectedAnswer: Seq[Row] = null
withSQLConf(vanillaSparkConfs(): _*) {
expectedAnswer = spark.read
.option("delimiter", "|-|")
.option("nullValue", "null")
.schema(df.schema)
.schema(StructType.apply(fields))
.csv(queriesResults + "/" + queryNum + ".out")
.toDF()
.collect()
Expand Down
9 changes: 8 additions & 1 deletion cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2117,7 +2117,14 @@ const ActionsDAG::Node * SerializedPlanParser::parseExpression(ActionsDAGPtr act
QueryPlanPtr SerializedPlanParser::parse(const std::string & plan)
{
auto plan_ptr = std::make_unique<substrait::Plan>();
auto ok = plan_ptr->ParseFromString(plan);
/// https://stackoverflow.com/questions/52028583/getting-error-parsing-protobuf-data
/// Parsing may fail when the number of recursive layers is large.
/// Here, set a limit large enough to avoid this problem.
/// Once this problem occurs, it is difficult to troubleshoot, because the pb of c++ will not provide any valid information
google::protobuf::io::CodedInputStream coded_in(reinterpret_cast<const uint8_t *>(plan.data()), plan.size());
coded_in.SetRecursionLimit(100000);

auto ok = plan_ptr->ParseFromCodedStream(&coded_in);
if (!ok)
throw Exception(ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, "Parse substrait::Plan from string failed");

Expand Down

0 comments on commit bfbd42c

Please sign in to comment.