diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SinglePartPartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SinglePartPartitionValueExtractor.java new file mode 100644 index 0000000000000..abbccfcc53763 --- /dev/null +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/SinglePartPartitionValueExtractor.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.hive; + +import org.apache.hudi.sync.common.model.PartitionValueExtractor; + +import java.util.Collections; +import java.util.List; + +/** + * Extractor for a partition path from a single column. + *

+ * This implementation extracts the partition value from the partition path as a single part + * even if the relative partition path contains slashes, e.g., the `TimestampBasedKeyGenerator` + * transforms the timestamp column into the partition path in the format of "yyyyMM/dd/HH". + * The slash (`/`) is replaced with dash (`-`), e.g., `202210/01/20` -> `202210-01-20`. + */ +public class SinglePartPartitionValueExtractor implements PartitionValueExtractor { + @Override + public List extractPartitionValuesInPath(String partitionPath) { + return Collections.singletonList(partitionPath.replace('/', '-')); + } +} diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java index ba5a544af18b8..075542d596717 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java @@ -18,8 +18,12 @@ package org.apache.hudi.hive; +import org.apache.hudi.sync.common.model.PartitionValueExtractor; + import org.junit.jupiter.api.Test; + import java.util.ArrayList; +import java.util.Collections; import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -46,4 +50,12 @@ public void testHiveStylePartition() { IllegalArgumentException.class, () -> hiveStylePartition.extractPartitionValuesInPath("2021/04/02")); } + + @Test + public void testSinglePartPartition() { + PartitionValueExtractor extractor = new SinglePartPartitionValueExtractor(); + assertEquals( + Collections.singletonList("202210-01-20"), + extractor.extractPartitionValuesInPath("202210/01/20")); + } } diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java index 43502f612f92b..b927cdb0c316e 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java @@ -104,10 +104,13 @@ public class HoodieSyncConfig extends HoodieConfig { String partitionFields = partitionFieldsOpt.get(); if (StringUtils.nonEmpty(partitionFields)) { int numOfPartFields = partitionFields.split(",").length; - if (numOfPartFields == 1 - && cfg.contains(HIVE_STYLE_PARTITIONING_ENABLE) - && cfg.getString(HIVE_STYLE_PARTITIONING_ENABLE).equals("true")) { - return Option.of("org.apache.hudi.hive.HiveStylePartitionValueExtractor"); + if (numOfPartFields == 1) { + if (cfg.contains(HIVE_STYLE_PARTITIONING_ENABLE) + && cfg.getString(HIVE_STYLE_PARTITIONING_ENABLE).equals("true")) { + return Option.of("org.apache.hudi.hive.HiveStylePartitionValueExtractor"); + } else { + return Option.of("org.apache.hudi.hive.SinglePartPartitionValueExtractor"); + } } else { return Option.of("org.apache.hudi.hive.MultiPartKeysValueExtractor"); } diff --git a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java index f8e4eff30a5af..aef283e595ca2 100644 --- a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java @@ -104,7 +104,7 @@ void testInferPartitionFields() { } @Test - void testInferPartitonExtractorClass() { + void testInferPartitionExtractorClass() { Properties props0 = new Properties(); HoodieSyncConfig config0 = new HoodieSyncConfig(props0, new Configuration()); assertEquals("org.apache.hudi.hive.MultiPartKeysValueExtractor", @@ -140,6 +140,13 @@ void testInferPartitonExtractorClass() { HoodieSyncConfig config4 = new HoodieSyncConfig(props4, new Configuration()); assertEquals("org.apache.hudi.hive.HiveStylePartitionValueExtractor", config4.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS)); + + Properties props5 = new Properties(); + props5.setProperty(HoodieTableConfig.PARTITION_FIELDS.key(), "foo"); + props5.setProperty(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE.key(), "false"); + HoodieSyncConfig config5 = new HoodieSyncConfig(props5, new Configuration()); + assertEquals("org.apache.hudi.hive.SinglePartPartitionValueExtractor", + config5.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS)); } @Test