From b12ec7987dc191bdfe346261c25d64530e228ddf Mon Sep 17 00:00:00 2001 From: yufeigu Date: Thu, 9 Sep 2021 10:55:34 -0700 Subject: [PATCH 01/11] Consolidate write.folder-storage.path and write.folder-storage.path to write.data.path. --- .../org/apache/iceberg/LocationProviders.java | 26 ++++++++++++------- .../org/apache/iceberg/TableProperties.java | 18 ++++++++++--- .../apache/iceberg/TestLocationProvider.java | 10 ++++++- site/docs/aws.md | 7 +++-- .../spark/source/IcebergSourceBenchmark.java | 5 ++-- .../actions/TestRemoveOrphanFilesAction.java | 4 +-- .../spark/source/TestDataFrameWrites.java | 6 ++--- .../actions/BaseSnapshotTableSparkAction.java | 2 +- 8 files changed, 50 insertions(+), 28 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 19385e1f545c..7d6a49d682b4 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -29,8 +29,6 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.util.PropertyUtil; -import static org.apache.iceberg.TableProperties.OBJECT_STORE_PATH; - public class LocationProviders { private LocationProviders() { @@ -68,16 +66,11 @@ public static LocationProvider locationsFor(String location, Map } } - private static String defaultDataLocation(String tableLocation, Map properties) { - return properties.getOrDefault(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, - String.format("%s/data", tableLocation)); - } - static class DefaultLocationProvider implements LocationProvider { private final String dataLocation; DefaultLocationProvider(String tableLocation, Map properties) { - this.dataLocation = stripTrailingSlash(defaultDataLocation(tableLocation, properties)); + this.dataLocation = stripTrailingSlash(dataLocation(properties, tableLocation)); } @Override @@ -99,8 +92,7 @@ static class ObjectStoreLocationProvider implements LocationProvider { private final String context; ObjectStoreLocationProvider(String tableLocation, Map properties) { - this.storageLocation = stripTrailingSlash(properties.getOrDefault(OBJECT_STORE_PATH, - defaultDataLocation(tableLocation, properties))); + this.storageLocation = stripTrailingSlash(dataLocation(properties, tableLocation)); this.context = pathContext(tableLocation); } @@ -141,4 +133,18 @@ private static String stripTrailingSlash(String path) { } return result; } + + public static String dataLocation(Map properties, String tableLocation) { + String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); + if(dataLocation == null) { + dataLocation = properties.get(TableProperties.OBJECT_STORE_PATH); + if (dataLocation == null) { + dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + if (dataLocation == null) { + dataLocation = String.format("%s/data", tableLocation); + } + } + } + return dataLocation; + } } diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index dcbab13cd70d..1ccb2102abf8 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -135,21 +135,31 @@ private TableProperties() { public static final String OBJECT_STORE_ENABLED = "write.object-storage.enabled"; public static final boolean OBJECT_STORE_ENABLED_DEFAULT = false; + /** + * @deprecated will be removed in 0.14.0, use {@link #WRITE_DATA_LOCATION} instead + */ + @Deprecated public static final String OBJECT_STORE_PATH = "write.object-storage.path"; public static final String WRITE_LOCATION_PROVIDER_IMPL = "write.location-provider.impl"; - // This only applies to files written after this property is set. Files previously written aren't - // relocated to reflect this parameter. - // If not set, defaults to a "data" folder underneath the root path of the table. + /** + * @deprecated will be removed in 0.14.0, use {@link #WRITE_DATA_LOCATION} instead + */ + @Deprecated public static final String WRITE_FOLDER_STORAGE_LOCATION = "write.folder-storage.path"; /** - * @deprecated will be removed in 0.14.0, use {@link #WRITE_FOLDER_STORAGE_LOCATION} instead + * @deprecated will be removed in 0.14.0, use {@link #WRITE_DATA_LOCATION} instead */ @Deprecated public static final String WRITE_NEW_DATA_LOCATION = "write.folder-storage.path"; + // This only applies to files written after this property is set. Files previously written aren't + // relocated to reflect this parameter. + // If not set, defaults to a "data" folder underneath the root path of the table. + public static final String WRITE_DATA_LOCATION = "write.data.path"; + // This only applies to files written after this property is set. Files previously written aren't // relocated to reflect this parameter. // If not set, defaults to a "metadata" folder underneath the root path of the table. diff --git a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java index e47b0e30f9ea..62d4f78ab49a 100644 --- a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java +++ b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java @@ -114,7 +114,7 @@ public void testDefaultLocationProvider() { @Test public void testDefaultLocationProviderWithCustomDataLocation() { this.table.updateProperties() - .set(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, "new_location") + .set(TableProperties.WRITE_DATA_LOCATION, "new_location") .commit(); this.table.locationProvider().newDataLocation("my_file"); @@ -237,5 +237,13 @@ public void testObjectStorageLocationProviderPathResolution() { Assert.assertTrue("object storage path should be used when set", table.locationProvider().newDataLocation("file").contains(objectPath)); + + String dataPath = "s3://random/data/location"; + table.updateProperties() + .set(TableProperties.WRITE_DATA_LOCATION, dataPath) + .commit(); + + Assert.assertTrue("object storage path should be used when set", + table.locationProvider().newDataLocation("file").contains(dataPath)); } } diff --git a/site/docs/aws.md b/site/docs/aws.md index ab37962e2505..1b8e16ce5d02 100644 --- a/site/docs/aws.md +++ b/site/docs/aws.md @@ -344,7 +344,7 @@ Data stored in S3 with a traditional Hive storage layout can face S3 request thr Iceberg by default uses the Hive storage layout, but can be switched to use the `ObjectStoreLocationProvider`. With `ObjectStoreLocationProvider`, a determenistic hash is generated for each stored file, with the hash appended -directly after the `write.object-storage.path`. This ensures files written to s3 are equally distributed across multiple [prefixes](https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/) in the S3 bucket. Resulting in minimized throttling and maximized throughput for S3-related IO operations. When using `ObjectStoreLocationProvider` having a shared and short `write.object-storage.path` across your Iceberg tables will improve performance. +directly after the `write.data.path`. This ensures files written to s3 are equally distributed across multiple [prefixes](https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/) in the S3 bucket. Resulting in minimized throttling and maximized throughput for S3-related IO operations. When using `ObjectStoreLocationProvider` having a shared and short `write.data.path` across your Iceberg tables will improve performance. For more information on how S3 scales API QPS, checkout the 2018 re:Invent session on [Best Practices for Amazon S3 and Amazon S3 Glacier]( https://youtu.be/rHeTn9pHNKo?t=3219). At [53:39](https://youtu.be/rHeTn9pHNKo?t=3219) it covers how S3 scales/partitions & at [54:50](https://youtu.be/rHeTn9pHNKo?t=3290) it discusses the 30-60 minute wait time before new partitions are created. @@ -358,7 +358,7 @@ CREATE TABLE my_catalog.my_ns.my_table ( USING iceberg OPTIONS ( 'write.object-storage.enabled'=true, - 'write.object-storage.path'='s3://my-table-data-bucket') + 'write.data.path'='s3://my-table-data-bucket') PARTITIONED BY (category); ``` @@ -373,8 +373,7 @@ s3://my-table-data-bucket/2d3905f8/my_ns.db/my_table/category=orders/00000-0-5af ``` Note, the path resolution logic for `ObjectStoreLocationProvider` is as follows: -- if `write.object-storage.path` is set, use it -- if not found, fallback to `write.folder-storage.path` +- if `write.data.path` is set, use it - if not found, use `/data` For more details, please refer to the [LocationProvider Configuration](../custom-catalog/#custom-location-provider-implementation) section. diff --git a/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java b/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java index cb25074b4d96..75a28782d4b4 100644 --- a/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java +++ b/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java @@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.iceberg.LocationProviders; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.UpdateProperties; @@ -78,9 +79,7 @@ protected String newTableLocation() { } protected String dataLocation() { - Map properties = table.properties(); - return properties.getOrDefault(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, - String.format("%s/data", table.location())); + return LocationProviders.dataLocation(table().properties(), table.location()); } protected void cleanupFiles() throws IOException { diff --git a/spark/src/test/java/org/apache/iceberg/actions/TestRemoveOrphanFilesAction.java b/spark/src/test/java/org/apache/iceberg/actions/TestRemoveOrphanFilesAction.java index 7c2a325eb783..cf85bc850420 100644 --- a/spark/src/test/java/org/apache/iceberg/actions/TestRemoveOrphanFilesAction.java +++ b/spark/src/test/java/org/apache/iceberg/actions/TestRemoveOrphanFilesAction.java @@ -285,7 +285,7 @@ public void testWapFilesAreKept() throws InterruptedException { public void testMetadataFolderIsIntact() throws InterruptedException { // write data directly to the table location Map props = Maps.newHashMap(); - props.put(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, tableLocation); + props.put(TableProperties.WRITE_DATA_LOCATION, tableLocation); Table table = TABLES.create(SCHEMA, SPEC, props, tableLocation); List records = Lists.newArrayList( @@ -357,7 +357,7 @@ public void testOlderThanTimestamp() throws InterruptedException { @Test public void testRemoveUnreachableMetadataVersionFiles() throws InterruptedException { Map props = Maps.newHashMap(); - props.put(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, tableLocation); + props.put(TableProperties.WRITE_DATA_LOCATION, tableLocation); props.put(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "1"); Table table = TABLES.create(SCHEMA, SPEC, props, tableLocation); diff --git a/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index 8ad8e2e2b802..36c0541f46c5 100644 --- a/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -145,7 +145,7 @@ public void testWriteWithCustomDataLocation() throws IOException { File tablePropertyDataLocation = temp.newFolder("test-table-property-data-dir"); Table table = createTable(new Schema(SUPPORTED_PRIMITIVES.fields()), location); table.updateProperties().set( - TableProperties.WRITE_FOLDER_STORAGE_LOCATION, tablePropertyDataLocation.getAbsolutePath()).commit(); + TableProperties.WRITE_DATA_LOCATION, tablePropertyDataLocation.getAbsolutePath()).commit(); writeAndValidateWithLocations(table, location, tablePropertyDataLocation); } @@ -271,7 +271,7 @@ public void testNullableWithWriteOption() throws IOException { String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); - tableProperties = ImmutableMap.of(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, targetPath); + tableProperties = ImmutableMap.of(TableProperties.WRITE_DATA_LOCATION, targetPath); // read this and append to iceberg dataset spark @@ -312,7 +312,7 @@ public void testNullableWithSparkSqlOption() throws IOException { String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); - tableProperties = ImmutableMap.of(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, targetPath); + tableProperties = ImmutableMap.of(TableProperties.WRITE_DATA_LOCATION, targetPath); // read this and append to iceberg dataset spark diff --git a/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java b/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java index fdc5bf09ce4d..7b7ee754f586 100644 --- a/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java +++ b/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java @@ -167,7 +167,7 @@ protected Map destTableProps() { // remove any possible location properties from origin properties properties.remove(LOCATION); properties.remove(TableProperties.WRITE_METADATA_LOCATION); - properties.remove(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + properties.remove(TableProperties.WRITE_DATA_LOCATION); // set default and user-provided props properties.put(TableCatalog.PROP_PROVIDER, "iceberg"); From abcf95082d399c1ca6c3b4f71827f8417d5cd135 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Thu, 9 Sep 2021 11:18:33 -0700 Subject: [PATCH 02/11] Fix the style issue. --- core/src/main/java/org/apache/iceberg/LocationProviders.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 7d6a49d682b4..a2dd146a31ca 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -136,7 +136,7 @@ private static String stripTrailingSlash(String path) { public static String dataLocation(Map properties, String tableLocation) { String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); - if(dataLocation == null) { + if (dataLocation == null) { dataLocation = properties.get(TableProperties.OBJECT_STORE_PATH); if (dataLocation == null) { dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); From 85c9f663b0e2ec4a94c90ea3dc1ff35fc595f43e Mon Sep 17 00:00:00 2001 From: yufeigu Date: Fri, 10 Sep 2021 10:42:55 -0700 Subject: [PATCH 03/11] Resolve comments. --- .../org/apache/iceberg/LocationProviders.java | 27 ++++++++++------ .../org/apache/iceberg/TableProperties.java | 4 +-- .../apache/iceberg/TestLocationProvider.java | 32 +++++++++++++++---- .../spark/source/IcebergSourceBenchmark.java | 4 +-- 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index a2dd146a31ca..3352fba76108 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -28,8 +28,11 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.PropertyUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class LocationProviders { + private static final Logger LOG = LoggerFactory.getLogger(LocationProviders.class); private LocationProviders() { } @@ -54,10 +57,12 @@ public static LocationProvider locationsFor(String location, Map return ctor.newInstance(location, properties); } catch (ClassCastException e) { throw new IllegalArgumentException( - String.format("Provided implementation for dynamic instantiation should implement %s.", + String.format( + "Provided implementation for dynamic instantiation should implement %s.", LocationProvider.class), e); } - } else if (PropertyUtil.propertyAsBoolean(properties, + } else if (PropertyUtil.propertyAsBoolean( + properties, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_ENABLED_DEFAULT)) { return new ObjectStoreLocationProvider(location, properties); @@ -70,7 +75,8 @@ static class DefaultLocationProvider implements LocationProvider { private final String dataLocation; DefaultLocationProvider(String tableLocation, Map properties) { - this.dataLocation = stripTrailingSlash(dataLocation(properties, tableLocation)); + this.dataLocation = + stripTrailingSlash(dataLocation(properties, tableLocation, TableProperties.WRITE_FOLDER_STORAGE_LOCATION)); } @Override @@ -92,7 +98,8 @@ static class ObjectStoreLocationProvider implements LocationProvider { private final String context; ObjectStoreLocationProvider(String tableLocation, Map properties) { - this.storageLocation = stripTrailingSlash(dataLocation(properties, tableLocation)); + this.storageLocation = + stripTrailingSlash(dataLocation(properties, tableLocation, TableProperties.OBJECT_STORE_PATH)); this.context = pathContext(tableLocation); } @@ -134,14 +141,16 @@ private static String stripTrailingSlash(String path) { return result; } - public static String dataLocation(Map properties, String tableLocation) { + private static String dataLocation(Map properties, String tableLocation, String deprecatedProperty) { String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); if (dataLocation == null) { - dataLocation = properties.get(TableProperties.OBJECT_STORE_PATH); + dataLocation = properties.get(deprecatedProperty); if (dataLocation == null) { - dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); - if (dataLocation == null) { - dataLocation = String.format("%s/data", tableLocation); + dataLocation = String.format("%s/data", tableLocation); + } else { + if (deprecatedProperty != null) { + LOG.warn("Table property {} is deprecated, please use {} instead.", deprecatedProperty, + TableProperties.WRITE_DATA_LOCATION); } } } diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index 1ccb2102abf8..cfe668a69703 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -136,7 +136,7 @@ private TableProperties() { public static final boolean OBJECT_STORE_ENABLED_DEFAULT = false; /** - * @deprecated will be removed in 0.14.0, use {@link #WRITE_DATA_LOCATION} instead + * @deprecated Use {@link #WRITE_DATA_LOCATION} instead. */ @Deprecated public static final String OBJECT_STORE_PATH = "write.object-storage.path"; @@ -144,7 +144,7 @@ private TableProperties() { public static final String WRITE_LOCATION_PROVIDER_IMPL = "write.location-provider.impl"; /** - * @deprecated will be removed in 0.14.0, use {@link #WRITE_DATA_LOCATION} instead + * @deprecated Use {@link #WRITE_DATA_LOCATION} instead. */ @Deprecated public static final String WRITE_FOLDER_STORAGE_LOCATION = "write.folder-storage.path"; diff --git a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java index 62d4f78ab49a..2a4b6cf5952b 100644 --- a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java +++ b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java @@ -222,21 +222,39 @@ public void testObjectStorageLocationProviderPathResolution() { Assert.assertTrue("default data location should be used when object storage path not set", table.locationProvider().newDataLocation("file").contains(table.location() + "/data")); - String folderPath = "s3://random/folder/location"; + String objectPath = "s3://random/object/location"; table.updateProperties() - .set(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, folderPath) + .set(TableProperties.OBJECT_STORE_PATH, objectPath) .commit(); - Assert.assertTrue("folder storage path should be used when set", - table.locationProvider().newDataLocation("file").contains(folderPath)); + Assert.assertTrue("object storage path should be used when set", + table.locationProvider().newDataLocation("file").contains(objectPath)); - String objectPath = "s3://random/object/location"; + String dataPath = "s3://random/data/location"; table.updateProperties() - .set(TableProperties.OBJECT_STORE_PATH, objectPath) + .set(TableProperties.WRITE_DATA_LOCATION, dataPath) .commit(); Assert.assertTrue("object storage path should be used when set", - table.locationProvider().newDataLocation("file").contains(objectPath)); + table.locationProvider().newDataLocation("file").contains(dataPath)); + } + + @Test + public void testDefaultStorageLocationProviderPathResolution() { + table.updateProperties() + .set(TableProperties.OBJECT_STORE_ENABLED, "false") + .commit(); + + Assert.assertTrue("default data location should be used when object storage path not set", + table.locationProvider().newDataLocation("file").contains(table.location() + "/data")); + + String folderPath = "s3://random/folder/location"; + table.updateProperties() + .set(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, folderPath) + .commit(); + + Assert.assertTrue("folder storage path should be used when set", + table.locationProvider().newDataLocation("file").contains(folderPath)); String dataPath = "s3://random/data/location"; table.updateProperties() diff --git a/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java b/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java index 75a28782d4b4..8c6e225fa1bf 100644 --- a/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java +++ b/spark/src/jmh/java/org/apache/iceberg/spark/source/IcebergSourceBenchmark.java @@ -26,7 +26,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.iceberg.LocationProviders; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.UpdateProperties; @@ -79,7 +78,8 @@ protected String newTableLocation() { } protected String dataLocation() { - return LocationProviders.dataLocation(table().properties(), table.location()); + Map properties = table.properties(); + return properties.getOrDefault(TableProperties.WRITE_DATA_LOCATION, String.format("%s/data", table.location())); } protected void cleanupFiles() throws IOException { From 45a79a08b58a37fec4d97dd966fff8a495d749e7 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Mon, 13 Sep 2021 10:33:14 -0700 Subject: [PATCH 04/11] Resolve comments. --- .../test/java/org/apache/iceberg/TestLocationProvider.java | 4 ++-- .../iceberg/spark/actions/BaseSnapshotTableSparkAction.java | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java index 2a4b6cf5952b..2b2a4300d461 100644 --- a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java +++ b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java @@ -235,7 +235,7 @@ public void testObjectStorageLocationProviderPathResolution() { .set(TableProperties.WRITE_DATA_LOCATION, dataPath) .commit(); - Assert.assertTrue("object storage path should be used when set", + Assert.assertTrue("write data path should be used when set", table.locationProvider().newDataLocation("file").contains(dataPath)); } @@ -261,7 +261,7 @@ public void testDefaultStorageLocationProviderPathResolution() { .set(TableProperties.WRITE_DATA_LOCATION, dataPath) .commit(); - Assert.assertTrue("object storage path should be used when set", + Assert.assertTrue("write data path should be used when set", table.locationProvider().newDataLocation("file").contains(dataPath)); } } diff --git a/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java b/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java index 7b7ee754f586..1ccb448f1dcc 100644 --- a/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java +++ b/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java @@ -167,6 +167,8 @@ protected Map destTableProps() { // remove any possible location properties from origin properties properties.remove(LOCATION); properties.remove(TableProperties.WRITE_METADATA_LOCATION); + properties.remove(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + properties.remove(TableProperties.OBJECT_STORE_PATH); properties.remove(TableProperties.WRITE_DATA_LOCATION); // set default and user-provided props From e4517b7bf2cafe51bf37f834f88d8e23790ec447 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Mon, 13 Sep 2021 14:15:36 -0700 Subject: [PATCH 05/11] Resolve comments. --- .../src/main/java/org/apache/iceberg/LocationProviders.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 3352fba76108..6162f4e1e3f7 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -57,12 +57,10 @@ public static LocationProvider locationsFor(String location, Map return ctor.newInstance(location, properties); } catch (ClassCastException e) { throw new IllegalArgumentException( - String.format( - "Provided implementation for dynamic instantiation should implement %s.", + String.format("Provided implementation for dynamic instantiation should implement %s.", LocationProvider.class), e); } - } else if (PropertyUtil.propertyAsBoolean( - properties, + } else if (PropertyUtil.propertyAsBoolean(properties, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_ENABLED_DEFAULT)) { return new ObjectStoreLocationProvider(location, properties); From 9579578041e99ecf3d53c46368c135a511c38f68 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Tue, 14 Sep 2021 16:38:40 -0700 Subject: [PATCH 06/11] Resolve comments. --- .../org/apache/iceberg/LocationProviders.java | 30 ++++++++++++++----- .../apache/iceberg/TestLocationProvider.java | 8 +++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 6162f4e1e3f7..3279a1fc17f6 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -74,7 +74,7 @@ static class DefaultLocationProvider implements LocationProvider { DefaultLocationProvider(String tableLocation, Map properties) { this.dataLocation = - stripTrailingSlash(dataLocation(properties, tableLocation, TableProperties.WRITE_FOLDER_STORAGE_LOCATION)); + stripTrailingSlash(dataLocation(properties, tableLocation, false)); } @Override @@ -97,7 +97,7 @@ static class ObjectStoreLocationProvider implements LocationProvider { ObjectStoreLocationProvider(String tableLocation, Map properties) { this.storageLocation = - stripTrailingSlash(dataLocation(properties, tableLocation, TableProperties.OBJECT_STORE_PATH)); + stripTrailingSlash(dataLocation(properties, tableLocation, true)); this.context = pathContext(tableLocation); } @@ -139,19 +139,33 @@ private static String stripTrailingSlash(String path) { return result; } - private static String dataLocation(Map properties, String tableLocation, String deprecatedProperty) { + private static String dataLocation(Map properties, String tableLocation, boolean isObjectStore) { String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); if (dataLocation == null) { + String deprecatedProperty = null; + if (isObjectStore) { + deprecatedProperty = TableProperties.OBJECT_STORE_PATH; + } + dataLocation = properties.get(deprecatedProperty); if (dataLocation == null) { - dataLocation = String.format("%s/data", tableLocation); - } else { - if (deprecatedProperty != null) { - LOG.warn("Table property {} is deprecated, please use {} instead.", deprecatedProperty, - TableProperties.WRITE_DATA_LOCATION); + dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + if (dataLocation == null) { + dataLocation = String.format("%s/data", tableLocation); + } else { + logWarnForDeprecatedProperty(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); } + } else { + logWarnForDeprecatedProperty(deprecatedProperty); } } return dataLocation; } + + private static void logWarnForDeprecatedProperty(String deprecatedProperty) { + if (deprecatedProperty != null) { + LOG.warn("Table property {} is deprecated, please use {} instead.", deprecatedProperty, + TableProperties.WRITE_DATA_LOCATION); + } + } } diff --git a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java index 2b2a4300d461..c20a4df44b07 100644 --- a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java +++ b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java @@ -222,6 +222,14 @@ public void testObjectStorageLocationProviderPathResolution() { Assert.assertTrue("default data location should be used when object storage path not set", table.locationProvider().newDataLocation("file").contains(table.location() + "/data")); + String folderPath = "s3://random/folder/location"; + table.updateProperties() + .set(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, folderPath) + .commit(); + + Assert.assertTrue("folder storage path should be used when set", + table.locationProvider().newDataLocation("file").contains(folderPath)); + String objectPath = "s3://random/object/location"; table.updateProperties() .set(TableProperties.OBJECT_STORE_PATH, objectPath) From 5402407c23fe608f4384227312caa45e3b2602c1 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Wed, 15 Sep 2021 10:32:28 -0700 Subject: [PATCH 07/11] Resolve comments. --- .../org/apache/iceberg/LocationProviders.java | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 3279a1fc17f6..71f28fe5d737 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -73,8 +73,7 @@ static class DefaultLocationProvider implements LocationProvider { private final String dataLocation; DefaultLocationProvider(String tableLocation, Map properties) { - this.dataLocation = - stripTrailingSlash(dataLocation(properties, tableLocation, false)); + this.dataLocation = stripTrailingSlash(dataLocation(properties, tableLocation, false)); } @Override @@ -96,8 +95,7 @@ static class ObjectStoreLocationProvider implements LocationProvider { private final String context; ObjectStoreLocationProvider(String tableLocation, Map properties) { - this.storageLocation = - stripTrailingSlash(dataLocation(properties, tableLocation, true)); + this.storageLocation = stripTrailingSlash(dataLocation(properties, tableLocation, true)); this.context = pathContext(tableLocation); } @@ -139,33 +137,40 @@ private static String stripTrailingSlash(String path) { return result; } + /** + * Get the data file location. For the {@link DefaultLocationProvider}, the priority level are + * "write.data.path" -> "write.folder-storage.path" -> "table-location/data". + * For the {@link ObjectStoreLocationProvider}, the priority level are + * "write.data.path" -> "write.object-storage.path" -> "write.folder-storage.path" -> "table-location/data". + */ private static String dataLocation(Map properties, String tableLocation, boolean isObjectStore) { String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); if (dataLocation == null) { - String deprecatedProperty = null; - if (isObjectStore) { - deprecatedProperty = TableProperties.OBJECT_STORE_PATH; - } - - dataLocation = properties.get(deprecatedProperty); + dataLocation = deprecatedDataLocation(properties, isObjectStore); if (dataLocation == null) { - dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); - if (dataLocation == null) { - dataLocation = String.format("%s/data", tableLocation); - } else { - logWarnForDeprecatedProperty(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); - } - } else { - logWarnForDeprecatedProperty(deprecatedProperty); + dataLocation = String.format("%s/data", tableLocation); } } return dataLocation; } - private static void logWarnForDeprecatedProperty(String deprecatedProperty) { - if (deprecatedProperty != null) { - LOG.warn("Table property {} is deprecated, please use {} instead.", deprecatedProperty, - TableProperties.WRITE_DATA_LOCATION); + private static String deprecatedDataLocation(Map properties, boolean isObjectStore) { + String deprecatedProperty = isObjectStore ? + TableProperties.OBJECT_STORE_PATH : TableProperties.WRITE_FOLDER_STORAGE_LOCATION; + + String dataLocation = properties.get(deprecatedProperty); + + final String warnMsg = "Table property {} is deprecated, please use " + TableProperties.WRITE_DATA_LOCATION + + " instead."; + if (dataLocation == null) { + dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + if (dataLocation != null) { + LOG.warn(warnMsg, TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + } + } else { + LOG.warn(warnMsg, deprecatedProperty); } + + return dataLocation; } } From 0c3209b01378075e0e83364414907a5337afeef4 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Wed, 15 Sep 2021 10:40:53 -0700 Subject: [PATCH 08/11] Resolve comments. --- core/src/main/java/org/apache/iceberg/LocationProviders.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 71f28fe5d737..b013662cb966 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -162,7 +162,7 @@ private static String deprecatedDataLocation(Map properties, boo final String warnMsg = "Table property {} is deprecated, please use " + TableProperties.WRITE_DATA_LOCATION + " instead."; - if (dataLocation == null) { + if (dataLocation == null && !deprecatedProperty.equals(TableProperties.WRITE_FOLDER_STORAGE_LOCATION)) { dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); if (dataLocation != null) { LOG.warn(warnMsg, TableProperties.WRITE_FOLDER_STORAGE_LOCATION); From 6018496a3a42696f1b2b9bb9bf51266962033787 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Wed, 15 Sep 2021 11:39:21 -0700 Subject: [PATCH 09/11] Resolve comments. --- .../src/main/java/org/apache/iceberg/LocationProviders.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index b013662cb966..594f84853415 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -162,13 +162,13 @@ private static String deprecatedDataLocation(Map properties, boo final String warnMsg = "Table property {} is deprecated, please use " + TableProperties.WRITE_DATA_LOCATION + " instead."; - if (dataLocation == null && !deprecatedProperty.equals(TableProperties.WRITE_FOLDER_STORAGE_LOCATION)) { + if (dataLocation != null) { + LOG.warn(warnMsg, deprecatedProperty); + } else if (deprecatedProperty.equals(TableProperties.OBJECT_STORE_PATH)) { dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); if (dataLocation != null) { LOG.warn(warnMsg, TableProperties.WRITE_FOLDER_STORAGE_LOCATION); } - } else { - LOG.warn(warnMsg, deprecatedProperty); } return dataLocation; From df709648d054b45fc10db0fda9db5fbe1a900890 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Wed, 15 Sep 2021 15:50:43 -0700 Subject: [PATCH 10/11] Resolve comments. --- site/docs/aws.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/site/docs/aws.md b/site/docs/aws.md index 1b8e16ce5d02..7e8588c7be91 100644 --- a/site/docs/aws.md +++ b/site/docs/aws.md @@ -372,9 +372,10 @@ Which will write the data to S3 with a hash (`2d3905f8`) appended directly after s3://my-table-data-bucket/2d3905f8/my_ns.db/my_table/category=orders/00000-0-5affc076-96a4-48f2-9cd2-d5efbc9f0c94-00001.parquet ``` -Note, the path resolution logic for `ObjectStoreLocationProvider` is as follows: -- if `write.data.path` is set, use it -- if not found, use `/data` +Note, the path resolution logic for `ObjectStoreLocationProvider` is `write.data.path` then `/data`. +However, for the older versions up to 0.12.0, the logic is as follows: +- before 0.12.0, `write.object-storage.path` must be set. +- at 0.12.0, `write.object-storage.path` then `write.folder-storage.path` then `/data`. For more details, please refer to the [LocationProvider Configuration](../custom-catalog/#custom-location-provider-implementation) section. From 884cbf800e93292a5dea3853093b705dbe772a32 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Mon, 20 Sep 2021 11:06:06 -0700 Subject: [PATCH 11/11] Resolve comments. --- .../org/apache/iceberg/LocationProviders.java | 69 ++++++++----------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/LocationProviders.java b/core/src/main/java/org/apache/iceberg/LocationProviders.java index 594f84853415..f735d9bbaba5 100644 --- a/core/src/main/java/org/apache/iceberg/LocationProviders.java +++ b/core/src/main/java/org/apache/iceberg/LocationProviders.java @@ -28,11 +28,8 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.PropertyUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class LocationProviders { - private static final Logger LOG = LoggerFactory.getLogger(LocationProviders.class); private LocationProviders() { } @@ -73,7 +70,18 @@ static class DefaultLocationProvider implements LocationProvider { private final String dataLocation; DefaultLocationProvider(String tableLocation, Map properties) { - this.dataLocation = stripTrailingSlash(dataLocation(properties, tableLocation, false)); + this.dataLocation = stripTrailingSlash(dataLocation(properties, tableLocation)); + } + + private static String dataLocation(Map properties, String tableLocation) { + String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); + if (dataLocation == null) { + dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + if (dataLocation == null) { + dataLocation = String.format("%s/data", tableLocation); + } + } + return dataLocation; } @Override @@ -95,10 +103,24 @@ static class ObjectStoreLocationProvider implements LocationProvider { private final String context; ObjectStoreLocationProvider(String tableLocation, Map properties) { - this.storageLocation = stripTrailingSlash(dataLocation(properties, tableLocation, true)); + this.storageLocation = stripTrailingSlash(dataLocation(properties, tableLocation)); this.context = pathContext(tableLocation); } + private static String dataLocation(Map properties, String tableLocation) { + String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); + if (dataLocation == null) { + dataLocation = properties.get(TableProperties.OBJECT_STORE_PATH); + if (dataLocation == null) { + dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); + if (dataLocation == null) { + dataLocation = String.format("%s/data", tableLocation); + } + } + } + return dataLocation; + } + @Override public String newDataLocation(PartitionSpec spec, StructLike partitionData, String filename) { return newDataLocation(String.format("%s/%s", spec.partitionToPath(partitionData), filename)); @@ -136,41 +158,4 @@ private static String stripTrailingSlash(String path) { } return result; } - - /** - * Get the data file location. For the {@link DefaultLocationProvider}, the priority level are - * "write.data.path" -> "write.folder-storage.path" -> "table-location/data". - * For the {@link ObjectStoreLocationProvider}, the priority level are - * "write.data.path" -> "write.object-storage.path" -> "write.folder-storage.path" -> "table-location/data". - */ - private static String dataLocation(Map properties, String tableLocation, boolean isObjectStore) { - String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); - if (dataLocation == null) { - dataLocation = deprecatedDataLocation(properties, isObjectStore); - if (dataLocation == null) { - dataLocation = String.format("%s/data", tableLocation); - } - } - return dataLocation; - } - - private static String deprecatedDataLocation(Map properties, boolean isObjectStore) { - String deprecatedProperty = isObjectStore ? - TableProperties.OBJECT_STORE_PATH : TableProperties.WRITE_FOLDER_STORAGE_LOCATION; - - String dataLocation = properties.get(deprecatedProperty); - - final String warnMsg = "Table property {} is deprecated, please use " + TableProperties.WRITE_DATA_LOCATION + - " instead."; - if (dataLocation != null) { - LOG.warn(warnMsg, deprecatedProperty); - } else if (deprecatedProperty.equals(TableProperties.OBJECT_STORE_PATH)) { - dataLocation = properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION); - if (dataLocation != null) { - LOG.warn(warnMsg, TableProperties.WRITE_FOLDER_STORAGE_LOCATION); - } - } - - return dataLocation; - } }