From 6e7a84706e68ad47d713824fec7564f85e57f25f Mon Sep 17 00:00:00 2001 From: David Phillips Date: Wed, 6 Dec 2023 14:05:03 +0800 Subject: [PATCH] Remove legacy Hive S3 tests --- .github/workflows/ci.yml | 16 -- .../bin/run_hive_s3_tests.sh | 75 ------- .../conf/files/hadoop-put.sh | 8 - plugin/trino-hive-hadoop2/pom.xml | 17 -- .../hive/TestHiveThriftMetastoreWithS3.java | 202 ------------------ .../resources/s3/hive-core-site.template.xml | 43 ---- 6 files changed, 361 deletions(-) delete mode 100755 plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh delete mode 100755 plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java delete mode 100644 plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e3d7b3dbfa5c..8fd233feea2b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -306,22 +306,6 @@ jobs: run: | source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && plugin/trino-hive-hadoop2/bin/run_hive_tests.sh - - name: Run Hive S3 Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.TRINO_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ vars.TRINO_AWS_REGION }} - S3_BUCKET: ${{ vars.TRINO_S3_BUCKET }} - S3_BUCKET_ENDPOINT: "https://s3.${{ vars.TRINO_AWS_REGION }}.amazonaws.com" - run: | - if [ "${AWS_ACCESS_KEY_ID}" != "" ]; then - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh - if [ matrix.config == 'config-hdp3' ]; then - # JsonSerde class needed for the S3 Select JSON tests is only available on hdp3. - plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh - fi - fi - name: Run Hive AWS Tests env: AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }} diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh deleted file mode 100755 index 82fa2b7fd6e6..000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -check_vars S3_BUCKET S3_BUCKET_ENDPOINT AWS_REGION \ - AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)" - -# insert AWS credentials -deploy_core_site_xml core-site.xml.s3-template \ - AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY S3_BUCKET_ENDPOINT - -# create test tables -# can't use create_test_tables because the first table is created with different commands -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_test_external_fs/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container /docker/files/hadoop-put.sh /docker/files/test_table.csv{,.gz,.bz2,.lz4} "${table_path}" -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -e " - CREATE EXTERNAL TABLE trino_test_external_fs(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}'" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_test_external_fs_with_header/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table_with_header.csv{,.gz,.bz2,.lz4} "${table_path}" -exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE trino_test_external_fs_with_header(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='1')" - -table_path="s3a://${S3_BUCKET}/${test_directory}/trino_test_external_fs_with_header_and_footer/" -exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" -exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table_with_header_and_footer.csv{,.gz,.bz2,.lz4} "${table_path}" -exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE trino_test_external_fs_with_header_and_footer(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='2', 'skip.footer.line.count'='2')" - -stop_unnecessary_hadoop_services - -# restart hive-metastore to apply S3 changes in core-site.xml -docker exec "$(hadoop_master_container)" supervisorctl restart hive-metastore -retry check_hadoop - -# run product tests -pushd "${PROJECT_ROOT}" -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-s3 \ - -DHADOOP_USER_NAME=hive \ - -Dhive.hadoop2.metastoreHost=localhost \ - -Dhive.hadoop2.metastorePort=9083 \ - -Dhive.hadoop2.databaseName=default \ - -Dhive.hadoop2.s3.endpoint="${S3_BUCKET_ENDPOINT}" \ - -Dhive.hadoop2.s3.region="${AWS_REGION}" \ - -Dhive.hadoop2.s3.awsAccessKey="${AWS_ACCESS_KEY_ID}" \ - -Dhive.hadoop2.s3.awsSecretKey="${AWS_SECRET_ACCESS_KEY}" \ - -Dhive.hadoop2.s3.writableBucket="${S3_BUCKET}" \ - -Dhive.hadoop2.s3.testDirectory="${test_directory}" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit "${EXIT_CODE}" diff --git a/plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh b/plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh deleted file mode 100755 index 33a7431af06e..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/hadoop-put.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -xeuo pipefail - -# Hadoop 3 without -d (don't create _COPYING_ temporary file) requires additional S3 permissions -# Hadoop 2 doesn't have '-d' switch -hadoop fs -put -f -d "$@" || -hadoop fs -put -f "$@" diff --git a/plugin/trino-hive-hadoop2/pom.xml b/plugin/trino-hive-hadoop2/pom.xml index 535dca91b021..eea081792b71 100644 --- a/plugin/trino-hive-hadoop2/pom.xml +++ b/plugin/trino-hive-hadoop2/pom.xml @@ -231,7 +231,6 @@ **/TestHive.java - **/TestHiveThriftMetastoreWithS3.java @@ -254,21 +253,5 @@ - - test-hive-hadoop2-s3 - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHiveThriftMetastoreWithS3.java - - - - - - diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java deleted file mode 100644 index d46a24a7e515..000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.google.common.collect.ImmutableMap; -import com.google.common.io.Resources; -import io.trino.plugin.hive.containers.HiveHadoop; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; -import io.trino.plugin.hive.s3.S3HiveQueryRunner; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.QueryRunner; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.PosixFilePermissions; -import java.util.List; - -import static io.trino.testing.TestingNames.randomNameSuffix; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Objects.requireNonNull; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHiveThriftMetastoreWithS3 - extends AbstractTestQueryFramework -{ - private final String s3endpoint; - private final String s3Region; - private final String awsAccessKey; - private final String awsSecretKey; - private final String writableBucket; - private final String schemaName; - private final Path hadoopCoreSiteXmlTempFile; - private final AmazonS3 s3Client; - - public TestHiveThriftMetastoreWithS3() - throws IOException - { - this.s3endpoint = requireNonNull(System.getProperty("hive.hadoop2.s3.endpoint"), "hive.hadoop2.s3.endpoint is null"); - this.s3Region = requireNonNull(System.getProperty("hive.hadoop2.s3.region"), "hive.hadoop2.s3.region is null"); - this.awsAccessKey = requireNonNull(System.getProperty("hive.hadoop2.s3.awsAccessKey"), "hive.hadoop2.s3.awsAccessKey is null"); - this.awsSecretKey = requireNonNull(System.getProperty("hive.hadoop2.s3.awsSecretKey"), "hive.hadoop2.s3.awsSecretKey is null"); - this.writableBucket = requireNonNull(System.getProperty("hive.hadoop2.s3.writableBucket"), "hive.hadoop2.s3.writableBucket is null"); - this.schemaName = "test_thrift_s3_" + randomNameSuffix(); - - String coreSiteXmlContent = Resources.toString(Resources.getResource("s3/hive-core-site.template.xml"), UTF_8) - .replace("%S3_BUCKET_ENDPOINT%", s3endpoint) - .replace("%AWS_ACCESS_KEY_ID%", awsAccessKey) - .replace("%AWS_SECRET_ACCESS_KEY%", awsSecretKey); - - hadoopCoreSiteXmlTempFile = Files.createTempFile("core-site", ".xml", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); - hadoopCoreSiteXmlTempFile.toFile().deleteOnExit(); - Files.writeString(hadoopCoreSiteXmlTempFile, coreSiteXmlContent); - - s3Client = AmazonS3Client.builder() - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3endpoint, null)) - .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey))) - .build(); - } - - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - HiveHadoop hiveHadoop = HiveHadoop.builder() - .withFilesToMount(ImmutableMap.of("/etc/hadoop/conf/core-site.xml", hadoopCoreSiteXmlTempFile.normalize().toAbsolutePath().toString())) - .build(); - hiveHadoop.start(); - - return S3HiveQueryRunner.builder() - .setHiveMetastoreEndpoint(hiveHadoop.getHiveMetastoreEndpoint()) - .setS3Endpoint(s3endpoint) - .setS3Region(s3Region) - .setS3AccessKey(awsAccessKey) - .setS3SecretKey(awsSecretKey) - .setBucketName(writableBucket) - .setCreateTpchSchemas(false) - .setThriftMetastoreConfig(new ThriftMetastoreConfig().setDeleteFilesOnDrop(true)) - .setHiveProperties(ImmutableMap.of("hive.allow-register-partition-procedure", "true")) - .build(); - } - - @BeforeAll - public void setUp() - { - String schemaLocation = "s3a://%s/%s".formatted(writableBucket, schemaName); - assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); - } - - @AfterAll - public void tearDown() - { - assertUpdate("DROP SCHEMA IF EXISTS " + schemaName); - } - - @Test - public void testRecreateTable() - { - String tableName = "test_recreate_table_" + randomNameSuffix(); - String schemaTableName = "%s.%s".formatted(schemaName, tableName); - String tableLocation = "%s/%s".formatted(schemaName, tableName); - - // Creating a new table generates special empty file on S3 (not MinIO) - assertUpdate("CREATE TABLE " + schemaTableName + "(col int)"); - try { - assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1)", 1); - assertThat(getS3ObjectSummaries(tableLocation)).hasSize(2); // directory + file - - // DROP TABLE with Thrift metastore on S3 (not MinIO) leaves some files - // when 'hive.metastore.thrift.delete-files-on-drop' config property is false. - // Then, the subsequent CREATE TABLE throws "Target directory for table 'xxx' already exists" - assertUpdate("DROP TABLE " + schemaTableName); - assertThat(getS3ObjectSummaries(tableLocation)).hasSize(0); - - assertUpdate("CREATE TABLE " + schemaTableName + "(col int)"); - } - finally { - assertUpdate("DROP TABLE IF EXISTS " + schemaTableName); - } - } - - @Test - public void testRecreatePartition() - { - String tableName = "test_recreate_partition_" + randomNameSuffix(); - String schemaTableName = "%s.%s".formatted(schemaName, tableName); - String partitionLocation = "%s/%s/part=1".formatted(schemaName, tableName); - - assertUpdate("CREATE TABLE " + schemaTableName + "(col int, part int) WITH (partitioned_by = ARRAY['part'])"); - try { - // Creating an empty partition generates special empty file on S3 (not MinIO) - assertUpdate("CALL system.create_empty_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1, 1)", 1); - assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)"); - - assertThat(getS3ObjectSummaries(partitionLocation)).hasSize(2); // directory + file - - // DELETE with Thrift metastore on S3 (not MinIO) leaves some files - // when 'hive.metastore.thrift.delete-files-on-drop' config property is false. - // Then, the subsequent SELECT doesn't return an empty row - assertUpdate("DELETE FROM " + schemaTableName); - assertThat(getS3ObjectSummaries(partitionLocation)).hasSize(0); - - assertUpdate("CALL system.create_empty_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertQueryReturnsEmptyResult("SELECT * FROM " + schemaTableName); - } - finally { - assertUpdate("DROP TABLE " + schemaTableName); - } - } - - @Test - public void testUnregisterPartitionNotRemoveData() - { - // Verify unregister_partition procedure doesn't remove physical data even when 'hive.metastore.thrift.delete-files-on-drop' config property is true - String tableName = "test_recreate_partition_" + randomNameSuffix(); - String schemaTableName = "%s.%s".formatted(schemaName, tableName); - - assertUpdate("CREATE TABLE " + schemaTableName + "(col int, part int) WITH (partitioned_by = ARRAY['part'])"); - try { - assertUpdate("INSERT INTO " + schemaTableName + " VALUES (1, 1)", 1); - assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)"); - - assertUpdate("CALL system.unregister_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertQueryReturnsEmptyResult("SELECT * FROM " + schemaTableName); - - assertUpdate("CALL system.register_partition('%s', '%s', ARRAY['part'], ARRAY['1'])".formatted(schemaName, tableName)); - assertQuery("SELECT * FROM " + schemaTableName, "VALUES (1, 1)"); - } - finally { - assertUpdate("DROP TABLE " + schemaTableName); - } - } - - private List getS3ObjectSummaries(String prefix) - { - return s3Client.listObjectsV2(writableBucket, prefix).getObjectSummaries(); - } -} diff --git a/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml b/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml deleted file mode 100644 index a3dc6ad47d4b..000000000000 --- a/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.s3a.endpoint - %S3_BUCKET_ENDPOINT% - - - - fs.s3.awsAccessKeyId - %AWS_ACCESS_KEY_ID% - - - - fs.s3.awsSecretAccessKey - %AWS_SECRET_ACCESS_KEY% - - - - fs.s3a.access.key - %AWS_ACCESS_KEY_ID% - - - - fs.s3a.secret.key - %AWS_SECRET_ACCESS_KEY% - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - -