From 5664403bb960b91e2f0fa87f2630b96e4c124701 Mon Sep 17 00:00:00 2001
From: jackierwzhang <ruowang.zhang@databricks.com>
Date: Tue, 1 Mar 2022 19:44:02 -0800
Subject: [PATCH] [SPARK-38094][SQL][FOLLOWUP] Fix exception message and add a
 test case

### What changes were proposed in this pull request?
Minor follow ups on https://github.com/apache/spark/pull/35385:
1. Add a nested schema test
2. Fixed an error message.

### Why are the changes needed?
Better observability.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing test

Closes #35700 from jackierwzhang/SPARK-38094-minor.

Authored-by: jackierwzhang <ruowang.zhang@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../parquet/ParquetReadSupport.scala          |  2 +-
 .../parquet/ParquetFieldIdIOSuite.scala       | 31 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 97e691ff7c66c..69684f9466f98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -140,7 +140,7 @@ object ParquetReadSupport extends Logging {
         "Spark read schema expects field Ids, " +
           "but Parquet file schema doesn't contain any field Ids.\n" +
         "Please remove the field ids from Spark schema or ignore missing ids by " +
-          "setting `spark.sql.parquet.fieldId.ignoreMissing = true`\n" +
+          s"setting `${SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.key} = true`\n" +
         s"""
            |Spark read schema:
            |${catalystRequestedSchema.prettyJson}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
index ff0bb2f92d208..5e01d3f447c96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, Metadata, MetadataBuilder, StringType, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata, MetadataBuilder, StringType, StructType}
 
 class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkSession  {
 
@@ -107,6 +107,35 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
     }
   }
 
+  test("SPARK-38094: absence of field ids: reading nested schema") {
+    withTempDir { dir =>
+      // now with nested schema/complex type
+      val readSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(1))
+          .add("b", ArrayType(StringType), true, withId(2))
+          .add("c", new StructType().add("c1", IntegerType, true, withId(6)), true, withId(3))
+          .add("d", MapType(StringType, StringType), true, withId(4))
+          .add("e", IntegerType, true, withId(5))
+
+      val writeSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(5))
+          .add("randomName", StringType, true)
+
+      val writeData = Seq(Row(100, "text"), Row(200, "more"))
+
+      spark.createDataFrame(writeData.asJava, writeSchema)
+        .write.mode("overwrite").parquet(dir.getCanonicalPath)
+
+      withAllParquetReaders {
+        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
+          // a, b, c, d all couldn't be found
+          Row(null, null, null, null, 100) :: Row(null, null, null, null, 200) :: Nil)
+      }
+    }
+  }
+
   test("multiple id matches") {
     withTempDir { dir =>
       val readSchema =