From 3c419b0ed59c591739333c8adb139bfbd47af09c Mon Sep 17 00:00:00 2001 From: xiarixiaoyao Date: Sat, 25 Jun 2022 17:16:32 +0800 Subject: [PATCH] [HUDI-4296]Fix the bug that TestHoodieSparkSqlWriter.testSchemaEvolutionForTableType is flaky --- .../main/scala/org/apache/hudi/BaseFileOnlyRelation.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala index 4160c34b0ce6..d6ec645920db 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala @@ -166,7 +166,9 @@ class BaseFileOnlyRelation(sqlContext: SQLContext, DataSource.apply( sparkSession = sparkSession, paths = extraReadPaths, - userSpecifiedSchema = userSchema, + // Here we should specify the schema to the latest commit schema since + // the table schema evolution. + userSpecifiedSchema = userSchema.orElse(Some(tableStructSchema)), className = formatClassName, // Since we're reading the table as just collection of files we have to make sure // we only read the latest version of every Hudi's file-group, which might be compacted, clustered, etc. @@ -175,8 +177,7 @@ class BaseFileOnlyRelation(sqlContext: SQLContext, // We rely on [[HoodieROTablePathFilter]], to do proper filtering to assure that options = optParams ++ Map( "mapreduce.input.pathFilter.class" -> classOf[HoodieROTablePathFilter].getName - ), - partitionColumns = partitionColumns + ) ) .resolveRelation() .asInstanceOf[HadoopFsRelation]