Skip to content

Commit

Permalink
Add test for hive.metastore.thrift.delete-files-on-drop
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Oct 12, 2022
1 parent ee76797 commit 3198397
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 1 deletion.
1 change: 1 addition & 0 deletions plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ set +e
-Dhive.hadoop2.metastoreHost=localhost \
-Dhive.hadoop2.metastorePort=9083 \
-Dhive.hadoop2.databaseName=default \
-Dhive.hadoop2.s3.endpoint="${S3_BUCKET_ENDPOINT}" \
-Dhive.hadoop2.s3.awsAccessKey="${AWS_ACCESS_KEY_ID}" \
-Dhive.hadoop2.s3.awsSecretKey="${AWS_SECRET_ACCESS_KEY}" \
-Dhive.hadoop2.s3.writableBucket="${S3_BUCKET}" \
Expand Down
20 changes: 20 additions & 0 deletions plugin/trino-hive-hadoop2/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-core</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>com.qubole.rubix</groupId>
<artifactId>rubix-presto-shaded</artifactId>
Expand Down Expand Up @@ -146,6 +158,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-testing-containers</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>testing</artifactId>
Expand Down Expand Up @@ -180,6 +198,7 @@
<excludes>
<exclude>**/TestHive.java</exclude>
<exclude>**/TestHiveAlluxioMetastore.java</exclude>
<exclude>**/TestHiveThriftMetastoreWithS3.java</exclude>
<exclude>**/TestHiveFileSystemS3.java</exclude>
<exclude>**/TestHiveFileSystemS3SelectPushdown.java</exclude>
<exclude>**/TestHiveFileSystemS3SelectJsonPushdown.java</exclude>
Expand Down Expand Up @@ -221,6 +240,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>**/TestHiveThriftMetastoreWithS3.java</include>
<include>**/TestHiveFileSystemS3.java</include>
<include>**/TestHiveFileSystemS3SelectPushdown.java</include>
<include>**/TestHiveFileSystemS3SelectCsvPushdownWithSplits.java</include>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive;

import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import io.trino.plugin.hive.containers.HiveHadoop;
import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig;
import io.trino.plugin.hive.s3.S3HiveQueryRunner;
import io.trino.testing.AbstractTestQueryFramework;
import io.trino.testing.QueryRunner;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Parameters;
import org.testng.annotations.Test;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.List;

import static io.trino.testing.sql.TestTable.randomTableSuffix;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;
import static org.assertj.core.api.Assertions.assertThat;

public class TestHiveThriftMetastoreWithS3
extends AbstractTestQueryFramework
{
private final String s3endpoint;
private final String awsAccessKey;
private final String awsSecretKey;
private final String writableBucket;
private final String schemaName;
private final Path hadoopCoreSiteXmlTempFile;
private final AmazonS3 s3Client;

@Parameters({
"hive.hadoop2.s3.endpoint",
"hive.hadoop2.s3.awsAccessKey",
"hive.hadoop2.s3.awsSecretKey",
"hive.hadoop2.s3.writableBucket",
})
public TestHiveThriftMetastoreWithS3(
String s3endpoint,
String awsAccessKey,
String awsSecretKey,
String writableBucket)
throws IOException
{
this.s3endpoint = requireNonNull(s3endpoint, "s3endpoint is null");
this.awsAccessKey = requireNonNull(awsAccessKey, "awsAccessKey is null");
this.awsSecretKey = requireNonNull(awsSecretKey, "awsSecretKey is null");
this.writableBucket = requireNonNull(writableBucket, "writableBucket is null");
this.schemaName = "test_thrift_s3_" + randomTableSuffix();

String coreSiteXmlContent = Resources.toString(Resources.getResource("s3/hive-core-site.template.xml"), UTF_8)
.replace("%S3_BUCKET_ENDPOINT%", s3endpoint)
.replace("%AWS_ACCESS_KEY_ID%", awsAccessKey)
.replace("%AWS_SECRET_ACCESS_KEY%", awsSecretKey);

hadoopCoreSiteXmlTempFile = Files.createTempFile("core-site", ".xml", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
hadoopCoreSiteXmlTempFile.toFile().deleteOnExit();
Files.writeString(hadoopCoreSiteXmlTempFile, coreSiteXmlContent);

s3Client = AmazonS3Client.builder()
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3endpoint, null))
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey)))
.build();
}

@Override
protected QueryRunner createQueryRunner()
throws Exception
{
HiveHadoop hiveHadoop = HiveHadoop.builder()
.withFilesToMount(ImmutableMap.of("/etc/hadoop/conf/core-site.xml", hadoopCoreSiteXmlTempFile.normalize().toAbsolutePath().toString()))
.build();
hiveHadoop.start();

return S3HiveQueryRunner.builder()
.setHiveMetastoreEndpoint(hiveHadoop.getHiveMetastoreEndpoint())
.setS3Endpoint(s3endpoint)
.setS3AccessKey(awsAccessKey)
.setS3SecretKey(awsSecretKey)
.setBucketName(writableBucket)
.setCreateTpchSchemas(false)
.setThriftMetastoreConfig(new ThriftMetastoreConfig().setDeleteFilesOnDrop(true))
.setHiveProperties(ImmutableMap.of("hive.allow-register-partition-procedure", "true"))
.build();
}

@BeforeClass
public void setUp()
{
String schemaLocation = "s3a://%s/%s".formatted(writableBucket, schemaName);
assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')");
}

@AfterClass(alwaysRun = true)
public void tearDown()
{
assertUpdate("DROP SCHEMA IF EXISTS " + schemaName);
}

@Test
public void testRecreateTable()
{
String tableName = "test_recreate_table_" + randomTableSuffix();
String schemaTableName = "%s.%s".formatted(schemaName, tableName);
String tableLocation = "%s/%s".formatted(schemaName, tableName);

// Creating a new table generates special empty file on S3 (not MinIO)
assertUpdate("CREATE TABLE " + schemaTableName + "(col int)");
try {
assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1)", 1);
assertThat(getS3ObjectSummaries(tableLocation)).hasSize(2); // directory + file

// DROP TABLE with Thrift metastore on S3 (not MinIO) leaves some files
// when 'hive.metastore.thrift.delete-files-on-drop' config property is false.
// Then, the subsequent CREATE TABLE throws "Target directory for table 'xxx' already exists"
assertUpdate("DROP TABLE " + schemaTableName);
assertThat(getS3ObjectSummaries(tableLocation)).hasSize(0);

assertUpdate("CREATE TABLE " + schemaTableName + "(col int)");
}
finally {
assertUpdate("DROP TABLE IF EXISTS " + schemaTableName);
}
}

private List<S3ObjectSummary> getS3ObjectSummaries(String prefix)
{
return s3Client.listObjectsV2(writableBucket, prefix).getObjectSummaries();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version="1.0"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000</value>
</property>

<property>
<name>fs.s3a.endpoint</name>
<value>%S3_BUCKET_ENDPOINT%</value>
</property>

<property>
<name>fs.s3.awsAccessKeyId</name>
<value>%AWS_ACCESS_KEY_ID%</value>
</property>

<property>
<name>fs.s3.awsSecretAccessKey</name>
<value>%AWS_SECRET_ACCESS_KEY%</value>
</property>

<property>
<name>fs.s3a.access.key</name>
<value>%AWS_ACCESS_KEY_ID%</value>
</property>

<property>
<name>fs.s3a.secret.key</name>
<value>%AWS_SECRET_ACCESS_KEY%</value>
</property>

<!-- Hive impersonation -->
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>

<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ public static class Builder<SELF extends Builder<?>>
private Optional<DirectoryLister> directoryLister = Optional.empty();
private boolean tpcdsCatalogEnabled;
private String security = SQL_STANDARD;
private boolean createTpchSchemas = true;
private ColumnNaming tpchColumnNaming = SIMPLIFIED;
private DecimalTypeMapping tpchDecimalTypeMapping = DOUBLE;

Expand Down Expand Up @@ -197,6 +198,12 @@ public SELF setSecurity(String security)
return self();
}

public SELF setCreateTpchSchemas(boolean createTpchSchemas)
{
this.createTpchSchemas = createTpchSchemas;
return self();
}

public SELF setTpchColumnNaming(ColumnNaming tpchColumnNaming)
{
this.tpchColumnNaming = requireNonNull(tpchColumnNaming, "tpchColumnNaming is null");
Expand Down Expand Up @@ -256,7 +263,9 @@ public DistributedQueryRunner build()
queryRunner.createCatalog(HIVE_CATALOG, "hive", hiveProperties);
queryRunner.createCatalog(HIVE_BUCKETED_CATALOG, "hive", hiveBucketedProperties);

populateData(queryRunner, metastore);
if (createTpchSchemas) {
populateData(queryRunner, metastore);
}

return queryRunner;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import io.trino.plugin.hive.containers.HiveMinioDataLake;
import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore;
import io.trino.plugin.hive.metastore.thrift.TestingTokenAwareMetastoreClientFactory;
import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig;
import io.trino.testing.DistributedQueryRunner;
import io.trino.tpch.TpchTable;

Expand Down Expand Up @@ -85,6 +86,7 @@ public static class Builder
{
private HostAndPort hiveMetastoreEndpoint;
private Duration thriftMetastoreTimeout = TestingTokenAwareMetastoreClientFactory.TIMEOUT;
private ThriftMetastoreConfig thriftMetastoreConfig = new ThriftMetastoreConfig();
private String s3Endpoint;
private String s3AccessKey;
private String s3SecretKey;
Expand All @@ -102,6 +104,12 @@ public Builder setThriftMetastoreTimeout(Duration thriftMetastoreTimeout)
return this;
}

public Builder setThriftMetastoreConfig(ThriftMetastoreConfig thriftMetastoreConfig)
{
this.thriftMetastoreConfig = requireNonNull(thriftMetastoreConfig, "thriftMetastoreConfig is null");
return this;
}

public Builder setS3Endpoint(String s3Endpoint)
{
this.s3Endpoint = requireNonNull(s3Endpoint, "s3Endpoint is null");
Expand Down Expand Up @@ -145,6 +153,7 @@ public DistributedQueryRunner build()
setMetastore(distributedQueryRunner -> new BridgingHiveMetastore(
testingThriftHiveMetastoreBuilder()
.metastoreClient(hiveMetastoreEndpoint, thriftMetastoreTimeout)
.thriftMetastoreConfig(thriftMetastoreConfig)
.build()));
setInitialSchemasLocationBase("s3a://" + bucketName); // cannot use s3:// as Hive metastore is not configured to accept it
return super.build();
Expand Down

0 comments on commit 3198397

Please sign in to comment.