Skip to content

Commit

Permalink
Add PARQUET_FIELD_ID_WRITE_ENABLED to Hudi
Browse files Browse the repository at this point in the history
  • Loading branch information
CTTY committed Jul 11, 2022
1 parent 10558b6 commit b99eb59
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ public class HoodieStorageConfig extends HoodieConfig {
.defaultValue("TIMESTAMP_MICROS")
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");

public static final ConfigProperty<String> PARQUET_FIELD_ID_WRITE_ENABLED = ConfigProperty
.key("hoodie.parquet.fieldId.write.enabled")
.defaultValue("true")
.withDocumentation("Sets spark.sql.parquet.fieldId.write.enabled. "
+ "If enabled, Spark will write out parquet native field ids that are stored inside StructField's metadata as parquet.field.id to parquet files.");

public static final ConfigProperty<String> HFILE_COMPRESSION_ALGORITHM_NAME = ConfigProperty
.key("hoodie.hfile.compression.algorithm")
.defaultValue("GZ")
Expand Down Expand Up @@ -337,6 +343,11 @@ public Builder parquetOutputTimestampType(String parquetOutputTimestampType) {
return this;
}

public Builder parquetFieldIdWrite(String parquetFieldIdWrite) {
storageConfig.setValue(PARQUET_FIELD_ID_WRITE_ENABLED, parquetFieldIdWrite);
return this;
}

public Builder hfileCompressionAlgorithm(String hfileCompressionAlgorithm) {
storageConfig.setValue(HFILE_COMPRESSION_ALGORITHM_NAME, hfileCompressionAlgorithm);
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1682,6 +1682,10 @@ public String parquetOutputTimestampType() {
return getString(HoodieStorageConfig.PARQUET_OUTPUT_TIMESTAMP_TYPE);
}

public String parquetFieldIdWriteEnabled() {
return getString(HoodieStorageConfig.PARQUET_FIELD_ID_WRITE_ENABLED);
}

public Option<HoodieLogBlock.HoodieLogBlockType> getLogDataBlockFormat() {
return Option.ofNullable(getString(HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT))
.map(HoodieLogBlock.HoodieLogBlockType::fromId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public HoodieRowParquetWriteSupport(Configuration conf, StructType structType, B
Configuration hadoopConf = new Configuration(conf);
hadoopConf.set("spark.sql.parquet.writeLegacyFormat", writeConfig.parquetWriteLegacyFormatEnabled());
hadoopConf.set("spark.sql.parquet.outputTimestampType", writeConfig.parquetOutputTimestampType());
hadoopConf.set("spark.sql.parquet.fieldId.write.enabled", writeConfig.parquetFieldIdWriteEnabled());
this.hadoopConf = hadoopConf;
setSchema(structType, hadoopConf);
this.bloomFilter = bloomFilter;
Expand Down

0 comments on commit b99eb59

Please sign in to comment.