apache · wankunde · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowFooterCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowFooterCommand.java
@@ -24,6 +24,7 @@
 import com.beust.jcommander.Parameter;
 import com.beust.jcommander.Parameters;
 import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.PropertyAccessor;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -64,13 +65,19 @@ public int run() throws IOException {
     return 0;
   }
 
+  abstract class MixIn {
+    @JsonIgnore
+    abstract int getInputFile();
+  }
+
   private String readFooter(InputFile inputFile) throws JsonProcessingException, IOException {
     String json;
     try (ParquetFileReader reader = ParquetFileReader.open(inputFile)) {
       ParquetMetadata footer = reader.getFooter();
       ObjectMapper mapper = RawUtils.createObjectMapper();
       mapper.setVisibility(PropertyAccessor.ALL, Visibility.NONE);
       mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);
+      mapper.addMixIn(ParquetMetadata.class, MixIn.class);
       json = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(footer);
     }
     return json;

diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml
@@ -95,6 +95,11 @@
       <artifactId>jackson-databind</artifactId>
       <version>${jackson-databind.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${jackson.groupId}</groupId>
+      <artifactId>jackson-annotations</artifactId>
+      <version>${jackson-databind.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.xerial.snappy</groupId>
       <artifactId>snappy-java</artifactId>

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -613,8 +613,10 @@ private static final ParquetMetadata readFooter(
 
     // Regular file, or encrypted file with plaintext footer
     if (!encryptedFooterMode) {
-      return converter.readParquetMetadata(
+      ParquetMetadata parquetMetadata = converter.readParquetMetadata(
           footerBytesStream, options.getMetadataFilter(), fileDecryptor, false, fileMetadataLength);
+      parquetMetadata.setInputFile(file);
+      return parquetMetadata;
     }
 
     // Encrypted file with encrypted footer
@@ -625,7 +627,10 @@ private static final ParquetMetadata readFooter(
     fileDecryptor.setFileCryptoMetaData(
         fileCryptoMetaData.getEncryption_algorithm(), true, fileCryptoMetaData.getKey_metadata());
     // footer length is required only for signed plaintext footers
-    return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
+    ParquetMetadata parquetMetadata =
+        converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter(), fileDecryptor, true, 0);
+    parquetMetadata.setInputFile(file);
+    return parquetMetadata;
   }
 
   /**
@@ -824,12 +829,19 @@ public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer)
   }
 
   public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException {
+    this(file, options, null);
+  }
+
+  public ParquetFileReader(InputFile file, ParquetReadOptions options, ParquetMetadata footer) throws IOException {
     this.converter = new ParquetMetadataConverter(options);
     this.file = file;
     this.f = file.newStream();
     this.options = options;
     try {
-      this.footer = readFooter(file, options, f, converter);
+      if (footer == null) {
+        footer = readFooter(file, options, f, converter);
+      }
+      this.footer = footer;
     } catch (Exception e) {
       // In case that reading footer throws an exception in the constructor, the new stream
       // should be closed. Otherwise, there's no way to close this outside.

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputSplit.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputSplit.java
@@ -18,6 +18,7 @@
  */
 package org.apache.parquet.hadoop;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
@@ -36,6 +37,7 @@
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 
@@ -55,6 +57,9 @@ public class ParquetInputSplit extends FileSplit implements Writable {
   private long end;
   private long[] rowGroupOffsets;
 
+  @JsonIgnore
+  private volatile ParquetMetadata footer;
+
   /**
    * Writables must have a parameterless constructor
    */
@@ -222,6 +227,14 @@ public long[] getRowGroupOffsets() {
     return rowGroupOffsets;
   }
 
+  public ParquetMetadata getFooter() {
+    return footer;
+  }
+
+  public void setFooter(ParquetMetadata footer) {
+    this.footer = footer;
+  }
+
   @Override
   public String toString() {
     String hosts;

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordReader.java
@@ -46,6 +46,7 @@
 import org.apache.parquet.hadoop.util.ContextUtil;
 import org.apache.parquet.hadoop.util.HadoopInputFile;
 import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
+import org.apache.parquet.io.InputFile;
 import org.apache.parquet.io.ParquetDecodingException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -155,8 +156,13 @@ private void initializeInternalReader(ParquetInputSplit split, Configuration con
     }
 
     // open a reader with the metadata filter
-    ParquetFileReader reader =
-        ParquetFileReader.open(HadoopInputFile.fromPath(path, configuration), optionsBuilder.build());
+    InputFile inputFile;
+    if (split.getFooter() != null && split.getFooter().getInputFile() != null) {
+      inputFile = split.getFooter().getInputFile();
+    } else {
+      inputFile = HadoopInputFile.fromPath(path, configuration);
+    }
+    ParquetFileReader reader = new ParquetFileReader(inputFile, optionsBuilder.build(), split.getFooter());
 
     if (rowGroupOffsets != null) {
       // verify a row group was found for each offset

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
@@ -18,11 +18,13 @@
  */
 package org.apache.parquet.hadoop.metadata;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.util.List;
+import org.apache.parquet.io.InputFile;
 
 /**
  * Meta Data block stored in the footer of the file
@@ -84,6 +86,9 @@ public static ParquetMetadata fromJSON(String json) {
   private final FileMetaData fileMetaData;
   private final List<BlockMetaData> blocks;
 
+  @JsonIgnore
+  private volatile InputFile inputFile;
+
   /**
    * @param fileMetaData file level metadata
    * @param blocks       block level metadata
@@ -107,6 +112,22 @@ public FileMetaData getFileMetaData() {
     return fileMetaData;
   }
 
+  /**
+   * Reuse the inputFile in ParquetFileReader if it is not null
+   * @return
+   */
+  public InputFile getInputFile() {
+    return inputFile;
+  }
+
+  /**
+   *
+   * @param inputFile Cache the inputFile in readFooter method and reuse it in ParquetFileReader
+   */
+  public void setInputFile(InputFile inputFile) {
+    this.inputFile = inputFile;
+  }
+
   @Override
   public String toString() {
     return "ParquetMetaData{" + fileMetaData + ", blocks: " + blocks + "}";