diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java index dca0c25775133..39cefa07527f0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java @@ -21,8 +21,7 @@ import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; -import java.util.Arrays; -import java.util.stream.Collectors; +import java.util.Collections; /** * Avro complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs. @@ -32,14 +31,10 @@ public class ComplexAvroKeyGenerator extends BaseKeyGenerator { public ComplexAvroKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()).split(",")) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()).split(",")) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .collect(Collectors.toList()); + this.recordKeyFields = props.getStringList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), ",", + Collections.singletonList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue())); + this.partitionPathFields = props.getStringList(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), ",", + Collections.emptyList()); } @Override diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java index 370b57b5308c4..659c7a3485e9b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java @@ -25,8 +25,7 @@ import org.apache.hudi.keygen.constant.KeyGeneratorOptions; import java.io.IOException; -import java.util.Arrays; -import java.util.stream.Collectors; +import java.util.Collections; /** * This is a generic implementation of KeyGenerator where users can configure record key as a single field or a combination of fields. Similarly partition path can be configured to have multiple @@ -55,8 +54,10 @@ public enum PartitionKeyType { public CustomAvroKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()).split(",")).map(String::trim).collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()).split(",")).map(String::trim).collect(Collectors.toList()); + this.recordKeyFields = props.getStringList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), ",", + Collections.singletonList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue())); + this.partitionPathFields = props.getStringList(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), ",", + Collections.emptyList()); } @Override @@ -69,7 +70,7 @@ public String getPartitionPath(GenericRecord record) { StringBuilder partitionPath = new StringBuilder(); //Corresponds to no partition case - if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) { + if (getPartitionPathFields().isEmpty()) { return ""; } for (String field : getPartitionPathFields()) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java index 2e2167f9379f0..afa6f89b908ba 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java @@ -25,8 +25,7 @@ import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.StructType; -import java.util.Arrays; -import java.util.stream.Collectors; +import java.util.Collections; /** * Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs. @@ -37,14 +36,10 @@ public class ComplexKeyGenerator extends BuiltinKeyGenerator { public ComplexKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()).split(",")) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()).split(",")) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .collect(Collectors.toList()); + this.recordKeyFields = props.getStringList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), ",", + Collections.singletonList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue())); + this.partitionPathFields = props.getStringList(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), ",", + Collections.emptyList()); complexAvroKeyGenerator = new ComplexAvroKeyGenerator(props); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java index c43892af451c5..b86f97218fc2c 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java @@ -31,8 +31,7 @@ import org.apache.spark.sql.types.StructType; import java.io.IOException; -import java.util.Arrays; -import java.util.stream.Collectors; +import java.util.Collections; /** * This is a generic implementation of KeyGenerator where users can configure record key as a single field or a combination of fields. Similarly partition path can be configured to have multiple @@ -53,8 +52,10 @@ public class CustomKeyGenerator extends BuiltinKeyGenerator { public CustomKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()).split(",")).map(String::trim).collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()).split(",")).map(String::trim).collect(Collectors.toList()); + this.recordKeyFields = props.getStringList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), ",", + Collections.singletonList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue())); + this.partitionPathFields = props.getStringList(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), ",", + Collections.emptyList()); customAvroKeyGenerator = new CustomAvroKeyGenerator(props); } @@ -95,7 +96,7 @@ private String getPartitionPath(Option record, Option row, O StringBuilder partitionPath = new StringBuilder(); //Corresponds to no partition case - if (getPartitionPathFields().size() == 1 && getPartitionPathFields().get(0).isEmpty()) { + if (getPartitionPathFields().isEmpty()) { return ""; } for (String field : getPartitionPathFields()) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java index 032c750f03240..52d85a6eeff2f 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java @@ -26,10 +26,8 @@ import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.StructType; -import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.stream.Collectors; /** * Simple Key generator for non-partitioned Hive Tables. @@ -40,8 +38,8 @@ public class NonpartitionedKeyGenerator extends BuiltinKeyGenerator { public NonpartitionedKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()) - .split(",")).map(String::trim).collect(Collectors.toList()); + this.recordKeyFields = props.getStringList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), ",", + Collections.singletonList(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue())); this.partitionPathFields = Collections.emptyList(); nonpartitionedAvroKeyGenerator = new NonpartitionedAvroKeyGenerator(props); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java b/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java index 09671ba2a3577..4d35c4576d266 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java @@ -73,7 +73,7 @@ public List getStringList(String property, String delimiter, List !s.isEmpty()).collect(Collectors.toList()); } public int getInteger(String property) { diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java index 735277d959ee4..5a3cb83c8bfe7 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java @@ -71,12 +71,15 @@ private TypedProperties getProps() { @Test public void testNullPartitionPathFields() { - Assertions.assertThrows(IllegalArgumentException.class, () -> new ComplexKeyGenerator(getPropertiesWithoutPartitionPathProp())); + ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(getPropertiesWithoutPartitionPathProp()); + assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 0); } @Test public void testNullRecordKeyFields() { - Assertions.assertThrows(IllegalArgumentException.class, () -> new ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp())); + ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp()); + assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1); + assertEquals(compositeKeyGenerator.getRecordKeyFields().get(0), KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue()); } @Test diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java index 26a2b439abfb2..424bc22dcd532 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java @@ -240,59 +240,22 @@ public void testInvalidPartitionKeyType(TypedProperties props) { } @Test - public void testNoRecordKeyFieldPropWithKeyGeneratorClass() { + public void testNoRecordKeyFieldPropWithKeyGeneratorClass() throws IOException { testNoRecordKeyFieldProp(true); } @Test - public void testNoRecordKeyFieldPropWithKeyGeneratorType() { + public void testNoRecordKeyFieldPropWithKeyGeneratorType() throws IOException { testNoRecordKeyFieldProp(false); } - public void testNoRecordKeyFieldProp(boolean useKeyGeneratorClassName) { + public void testNoRecordKeyFieldProp(boolean useKeyGeneratorClassName) throws IOException { TypedProperties propsWithoutRecordKeyFieldProps = getPropsWithoutRecordKeyFieldProps(useKeyGeneratorClassName); - try { - BuiltinKeyGenerator keyGenerator = - (BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(propsWithoutRecordKeyFieldProps); - - keyGenerator.getKey(getRecord()); - Assertions.fail("should fail when record key field is not provided!"); - } catch (Exception e) { - if (useKeyGeneratorClassName) { - // "Property hoodie.datasource.write.recordkey.field not found" exception cause CustomKeyGenerator init fail - Assertions.assertTrue(e - .getCause() - .getCause() - .getCause() - .getMessage() - .contains("Property hoodie.datasource.write.recordkey.field not found")); - } else { - Assertions.assertTrue(stackTraceToString(e).contains("Property hoodie.datasource.write.recordkey.field not found")); - } - - } - - try { - BuiltinKeyGenerator keyGenerator = - (BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(propsWithoutRecordKeyFieldProps); + BuiltinKeyGenerator keyGenerator = + (BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(propsWithoutRecordKeyFieldProps); - GenericRecord record = getRecord(); - Row row = KeyGeneratorTestUtilities.getRow(record); - keyGenerator.getRecordKey(row); - Assertions.fail("should fail when record key field is not provided!"); - } catch (Exception e) { - if (useKeyGeneratorClassName) { - // "Property hoodie.datasource.write.recordkey.field not found" exception cause CustomKeyGenerator init fail - Assertions.assertTrue(e - .getCause() - .getCause() - .getCause() - .getMessage() - .contains("Property hoodie.datasource.write.recordkey.field not found")); - } else { - Assertions.assertTrue(stackTraceToString(e).contains("Property hoodie.datasource.write.recordkey.field not found")); - } - } + Assertions.assertEquals(keyGenerator.getRecordKeyFields().size(), 1); + Assertions.assertEquals(keyGenerator.getRecordKeyFields().get(0), KeyGeneratorOptions.RECORDKEY_FIELD_NAME.defaultValue()); } @Test diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala index 7fc7d318d362f..ffa5863ecf51a 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala @@ -277,50 +277,19 @@ class TestDataSourceDefaults { assertEquals("field1/name1", keyGen.getPartitionPath(baseRow)) // partition path field not specified - try { - val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") - new ComplexKeyGenerator(props).getKey(baseRecord) - fail("Should have errored out") - } catch { - case e: IllegalArgumentException => - // do nothing - } - - // partition path field not specified using Row - try { + keyGen = { val props = new TypedProperties() props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") - val keyGen = new ComplexKeyGenerator(props) - keyGen.getRecordKey(baseRow) - fail("Should have errored out") - } catch { - case e: IllegalArgumentException => - // do nothing + new ComplexKeyGenerator(props) } - // recordkey field not specified - try { - val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") - new ComplexKeyGenerator(props).getKey(baseRecord) - fail("Should have errored out") - } catch { - case e: IllegalArgumentException => - // do nothing - } + val nonPartitionedHk = keyGen.getKey(baseRecord) + assertEquals("field1:field1", nonPartitionedHk.getRecordKey) + assertEquals("", nonPartitionedHk.getPartitionPath) - // recordkey field not specified - try { - val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") - val keyGen = new ComplexKeyGenerator(props) - keyGen.getPartitionPath(baseRow) - fail("Should have errored out") - } catch { - case e: IllegalArgumentException => - // do nothing - } + // partition path field not specified using Row + assertEquals("field1:field1", keyGen.getRecordKey(baseRow)) + assertEquals("", keyGen.getPartitionPath(baseRow)) // nested field as record key and partition path keyGen = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId,testNestedRecord.isAdmin", "testNestedRecord.userId,testNestedRecord.isAdmin", "false"))