From 85af13a6117358d3470053f99cd80859a5c4fca8 Mon Sep 17 00:00:00 2001 From: Mayank Vadariya <48036907+mayankvadariya@users.noreply.github.com> Date: Tue, 10 Sep 2024 13:27:34 -0400 Subject: [PATCH] Deduce correct partition location in Hive partition projection --- .../hive/projection/PartitionProjection.java | 10 ++++- .../hive/TestHiveS3AndGlueMetastoreTest.java | 45 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java index cd6a79f72d39..b6c22d7d8143 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java @@ -102,12 +102,20 @@ private Partition buildPartitionObject(Table table, String partitionName) table.getPartitionColumns().stream() .map(Column::getName).collect(Collectors.toList()), partitionValues)) - .orElseGet(() -> format("%s/%s/", table.getStorage().getLocation(), partitionName))) + .orElseGet(() -> getPartitionLocation(table.getStorage().getLocation(), partitionName))) .setBucketProperty(table.getStorage().getBucketProperty()) .setSerdeParameters(table.getStorage().getSerdeParameters())) .build(); } + private static String getPartitionLocation(String tableLocation, String partitionName) + { + if (tableLocation.endsWith("/")) { + return format("%s%s/", tableLocation, partitionName); + } + return format("%s/%s/", tableLocation, partitionName); + } + private static String expandStorageLocationTemplate(String template, List partitionColumns, List partitionValues) { Matcher matcher = PROJECTION_LOCATION_TEMPLATE_PLACEHOLDER_PATTERN.matcher(template); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java index 6c3a7499f0a6..11b4c2567417 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java @@ -35,6 +35,7 @@ import static io.trino.testing.MaterializedResult.resultBuilder; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -61,6 +62,7 @@ protected QueryRunner createQueryRunner() .addHiveProperty("hive.metastore.glue.default-warehouse-dir", schemaPath()) .addHiveProperty("hive.security", "allow-all") .addHiveProperty("hive.non-managed-table-writes-enabled", "true") + .addHiveProperty("hive.partition-projection-enabled", "true") .addHiveProperty("fs.hadoop.enabled", "false") .addHiveProperty("fs.native-s3.enabled", "true") .build(); @@ -299,6 +301,49 @@ private void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationP } } + @Test + public void testPartitionProjectionWithProvidedTableLocation() + { + for (LocationPattern locationPattern : LocationPattern.values()) { + if (locationPattern == DOUBLE_SLASH || locationPattern == TRIPLE_SLASH || locationPattern == TWO_TRAILING_SLASHES) { + assertThatThrownBy(() -> testPartitionProjectionWithProvidedTableLocation(locationPattern)) + .hasMessageStartingWith("Unsupported location that cannot be internally represented: ") + .hasStackTraceContaining("SQL: CREATE TABLE"); + continue; + } + testPartitionProjectionWithProvidedTableLocation(locationPattern); + } + } + + private void testPartitionProjectionWithProvidedTableLocation(LocationPattern locationPattern) + { + String tableName = "test_partition_projection_" + randomNameSuffix(); + String tableLocation = locationPattern.locationForTable(bucketName, schemaName, tableName); + + computeActual(format(""" + CREATE TABLE %s ( + name varchar(25), + short_name varchar WITH ( + partition_projection_type='date', + partition_projection_format='yyyy-MM-dd HH', + partition_projection_range=ARRAY['2001-01-22 00', '2001-01-22 06'], + partition_projection_interval=1, + partition_projection_interval_unit='HOURS' + ) + ) + WITH ( + partitioned_by=ARRAY['short_name'], + partition_projection_enabled=true, + external_location = '%s' + )""", + tableName, + tableLocation)); + + assertUpdate("INSERT INTO " + tableName + " VALUES ('name1', '2001-01-22 00')", 1); + + assertQuery(format("SELECT name FROM %s", tableName), "VALUES ('name1')"); + } + @Test public void testInvalidSchemaNameLocation() {