From 7ed8ffbcf8df942ff2636a6e6dc2aa3b734d742b Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:58:52 +0200 Subject: [PATCH 01/17] Use scientific notation for doubles in HQL Despite https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-FloatingPointTypes saying "Floating point literals are assumed to be DOUBLE. Scientific notation is not yet supported." - Hive 1.2 supports scientific notation - hive 3 interprets decimal literals as decimals --- presto-hive/src/test/sql/create-test.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/presto-hive/src/test/sql/create-test.sql b/presto-hive/src/test/sql/create-test.sql index bb95cf78fd5b..666b8aa1f64f 100644 --- a/presto-hive/src/test/sql/create-test.sql +++ b/presto-hive/src/test/sql/create-test.sql @@ -278,13 +278,13 @@ SELECT , CASE WHEN n % 27 = 0 THEN NULL ELSE map('test key', 'test value') END , CASE WHEN n % 29 = 0 THEN NULL ELSE array('abc', 'xyz', 'data') END , CASE WHEN n % 31 = 0 THEN NULL ELSE - array(named_struct('s_string', 'test abc', 's_double', 0.1), - named_struct('s_string' , 'test xyz', 's_double', 0.2)) END + array(named_struct('s_string', 'test abc', 's_double', 1e-1), + named_struct('s_string' , 'test xyz', 's_double', 2e-1)) END , CASE WHEN n % 31 = 0 THEN NULL ELSE - named_struct('s_string', 'test abc', 's_double', 0.1) END + named_struct('s_string', 'test abc', 's_double', 1e-1) END , CASE WHEN n % 33 = 0 THEN NULL ELSE - map(1, array(named_struct('s_string', 'test abc', 's_double', 0.1), - named_struct('s_string' , 'test xyz', 's_double', 0.2))) END + map(1, array(named_struct('s_string', 'test abc', 's_double', 1e-1), + named_struct('s_string' , 'test xyz', 's_double', 2e-1))) END FROM presto_test_sequence LIMIT 100 ; From 0312661a672422fec0edf3183b1f09c9ab59abf0 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:58:53 +0200 Subject: [PATCH 02/17] Temporarily force bucketing v1 in test --- presto-hive/src/test/sql/create-test.sql | 3 +++ .../java/io/prestosql/tests/hive/TestHiveBucketedTables.java | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/presto-hive/src/test/sql/create-test.sql b/presto-hive/src/test/sql/create-test.sql index 666b8aa1f64f..9e00b9feef50 100644 --- a/presto-hive/src/test/sql/create-test.sql +++ b/presto-hive/src/test/sql/create-test.sql @@ -63,6 +63,7 @@ COMMENT 'Presto test bucketed table' PARTITIONED BY (ds STRING) CLUSTERED BY (t_string, t_int) INTO 32 BUCKETS STORED AS RCFILE +TBLPROPERTIES ('bucketing_version'='1') -- TODO https://github.com/prestosql/presto/issues/538 remove ; CREATE TABLE presto_test_bucketed_by_bigint_boolean ( @@ -79,6 +80,7 @@ COMMENT 'Presto test bucketed table' PARTITIONED BY (ds STRING) CLUSTERED BY (t_bigint, t_boolean) INTO 32 BUCKETS STORED AS RCFILE +TBLPROPERTIES ('bucketing_version'='1') -- TODO https://github.com/prestosql/presto/issues/538 remove ; CREATE TABLE presto_test_bucketed_by_double_float ( @@ -95,6 +97,7 @@ COMMENT 'Presto test bucketed table' PARTITIONED BY (ds STRING) CLUSTERED BY (t_double, t_float) INTO 32 BUCKETS STORED AS RCFILE +TBLPROPERTIES ('bucketing_version'='1') -- TODO https://github.com/prestosql/presto/issues/538 remove ; CREATE TABLE presto_test_partition_schema_change ( diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBucketedTables.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBucketedTables.java index 8fd7fbc7b3b8..578c3d448677 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBucketedTables.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBucketedTables.java @@ -73,7 +73,8 @@ private static HiveTableDefinition bucketTableDefinition(String tableName, boole "CLUSTERED BY (n_regionkey) " + (sorted ? "SORTED BY (n_regionkey) " : " ") + "INTO 2 BUCKETS " + - "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'") + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' " + + "TBLPROPERTIES ('bucketing_version'='1')") .setNoData() .build(); } From 4e75fc05dc62635b6fbf3fff1ae4e680d9c264af Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:58:56 +0200 Subject: [PATCH 03/17] Disable offline partition test on Hive 2/3 `ALTER TABLE .. ENABLE OFFLINE` was removed in Hive 2.0 --- presto-hive-hadoop2/bin/run_hive_tests.sh | 6 +++ .../io/prestosql/plugin/hive/TestHive.java | 39 ++++++++++++++++++- .../src/test/sql/create-test-hive-1.sql | 2 + presto-hive/src/test/sql/create-test.sql | 2 - 4 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 presto-hive/src/test/sql/create-test-hive-1.sql diff --git a/presto-hive-hadoop2/bin/run_hive_tests.sh b/presto-hive-hadoop2/bin/run_hive_tests.sh index f8bf5463b0c1..7f47ca91a405 100755 --- a/presto-hive-hadoop2/bin/run_hive_tests.sh +++ b/presto-hive-hadoop2/bin/run_hive_tests.sh @@ -9,6 +9,11 @@ start_docker_containers # generate test data exec_in_hadoop_master_container su hive -c '/usr/bin/hive -f /files/sql/create-test.sql' +hive_version=$(exec_in_hadoop_master_container su hive -c '/usr/bin/hive --version 2>/dev/null | sed -ne '\''s/Hive \(.*\)/\1/p'\') +hive_version_major="${hive_version/.*}" +if test "${hive_version_major}" -lt 2; then + exec_in_hadoop_master_container su hive -c '/usr/bin/hive -f /files/sql/create-test-hive-1.sql' +fi stop_unnecessary_hadoop_services @@ -24,6 +29,7 @@ set +e -Dhive.hadoop2.metastorePort=9083 \ -Dhive.hadoop2.databaseName=default \ -Dhive.hadoop2.metastoreHost=hadoop-master \ + -Dhive.hadoop2.hiveVersion="${hive_version}" \ -Dhive.hadoop2.timeZone=Asia/Kathmandu \ -Dhive.metastore.thrift.client.socks-proxy=${PROXY}:1180 \ -Dhive.hdfs.socks-proxy=${PROXY}:1180 \ diff --git a/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java b/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java index 3f5418decef1..17463aab11cb 100644 --- a/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java +++ b/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java @@ -14,17 +14,26 @@ package io.prestosql.plugin.hive; import org.apache.hadoop.net.NetUtils; +import org.testng.SkipException; import org.testng.annotations.BeforeClass; import org.testng.annotations.Parameters; import java.net.UnknownHostException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.google.common.base.Preconditions.checkState; +import static java.lang.Integer.parseInt; +import static java.util.Objects.requireNonNull; public class TestHive extends AbstractTestHive { - @Parameters({"hive.hadoop2.metastoreHost", "hive.hadoop2.metastorePort", "hive.hadoop2.databaseName", "hive.hadoop2.timeZone"}) + private String hiveVersion; + + @Parameters({"hive.hadoop2.metastoreHost", "hive.hadoop2.metastorePort", "hive.hadoop2.databaseName", "hive.hadoop2.timeZone", "hive.hadoop2.hiveVersion"}) @BeforeClass - public void initialize(String host, int port, String databaseName, String timeZone) + public void initialize(String host, int port, String databaseName, String timeZone, String hiveVersion) throws UnknownHostException { String hadoopMasterIp = System.getProperty("hadoop-master-ip"); @@ -36,5 +45,31 @@ public void initialize(String host, int port, String databaseName, String timeZo } setup(host, port, databaseName, timeZone); + + this.hiveVersion = requireNonNull(hiveVersion, "hiveVersion is null"); + } + + private String getHiveVersion() + { + checkState(hiveVersion != null, "hiveVersion not set"); + return hiveVersion; + } + + private int getHiveVersionMajor() + { + String hiveVersion = getHiveVersion(); + Matcher matcher = Pattern.compile("^(\\d+)\\.").matcher(hiveVersion); + checkState(matcher.lookingAt(), "Invalid Hive version: %s", hiveVersion); + return parseInt(matcher.group(1)); + } + + @Override + public void testGetPartitionSplitsTableOfflinePartition() + { + if (getHiveVersionMajor() >= 2) { + throw new SkipException("ALTER TABLE .. ENABLE OFFLINE was removed in Hive 2.0 and this is a prerequisite for this test"); + } + + super.testGetPartitionSplitsTableOfflinePartition(); } } diff --git a/presto-hive/src/test/sql/create-test-hive-1.sql b/presto-hive/src/test/sql/create-test-hive-1.sql new file mode 100644 index 000000000000..024942d85c91 --- /dev/null +++ b/presto-hive/src/test/sql/create-test-hive-1.sql @@ -0,0 +1,2 @@ +-- ALTER TABLE .. ENABLE OFFLINE was removed in Hive 2.0 +ALTER TABLE presto_test_offline_partition PARTITION (ds='2012-12-30') ENABLE OFFLINE; diff --git a/presto-hive/src/test/sql/create-test.sql b/presto-hive/src/test/sql/create-test.sql index 9e00b9feef50..1834d5fb2c0d 100644 --- a/presto-hive/src/test/sql/create-test.sql +++ b/presto-hive/src/test/sql/create-test.sql @@ -202,8 +202,6 @@ SELECT 'test' FROM presto_test_sequence LIMIT 100; INSERT INTO TABLE presto_test_offline_partition PARTITION (ds='2012-12-30') SELECT 'test' FROM presto_test_sequence LIMIT 100; -ALTER TABLE presto_test_offline_partition PARTITION (ds='2012-12-30') ENABLE OFFLINE; - SET hive.enforce.bucketing = true; INSERT OVERWRITE TABLE presto_test_bucketed_by_string_int From 4404aa98dc944ef8dc3bf25b3b5cd16b77738996 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:58:57 +0200 Subject: [PATCH 04/17] Disable stats auto-gathering in Hive --- .../conf/tempto/tempto-configuration-for-hdp3.yaml | 7 +++++++ .../src/main/resources/tempto-configuration.yaml | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml b/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml index 586655832419..3be838b33579 100644 --- a/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml +++ b/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml @@ -1,3 +1,10 @@ hdfs: webhdfs: uri: http://${databases.hive.host}:9870 + +databases: + hive: + prepare_statement: + - USE ${databases.hive.schema} + # Hive 3 gathers stats by default. For test purposes we need to disable this behavior. + - SET hive.stats.column.autogather=false diff --git a/presto-product-tests/src/main/resources/tempto-configuration.yaml b/presto-product-tests/src/main/resources/tempto-configuration.yaml index e6fcaf576015..b008ce153c50 100644 --- a/presto-product-tests/src/main/resources/tempto-configuration.yaml +++ b/presto-product-tests/src/main/resources/tempto-configuration.yaml @@ -15,7 +15,8 @@ databases: jdbc_password: na jdbc_pooling: false schema: default - prepare_statement: USE ${databases.hive.schema} + prepare_statement: + - USE ${databases.hive.schema} table_manager_type: hive warehouse_directory_path: /user/hive/warehouse path: /user/hive/warehouse From 5c77281b036207c8325c0d6802db577113d14a92 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 22 Aug 2019 09:35:39 +0200 Subject: [PATCH 05/17] Allow partitioned table to have basic statistics after write Hive 3's metastore returns basic statistics for a table even when we don't write them. --- .../hive/TestHiveBasicTableStatistics.java | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java index c0424c108270..07731fb48e37 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java @@ -14,7 +14,6 @@ package io.prestosql.tests.hive; import com.google.common.primitives.Longs; -import io.prestosql.tempto.ProductTest; import io.prestosql.tempto.Requires; import io.prestosql.tempto.fulfillment.table.hive.tpch.ImmutableTpchTablesRequirements.ImmutableNationTable; import io.prestosql.tempto.query.QueryExecutor; @@ -35,7 +34,7 @@ @Requires(ImmutableNationTable.class) public class TestHiveBasicTableStatistics - extends ProductTest + extends HiveProductTest { @Test(groups = {HIVE_TABLE_STATISTICS}) public void testCreateUnpartitioned() @@ -127,8 +126,10 @@ public void testCreatePartitioned() "WHERE n_nationkey <> 23", tableName)); try { - BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); - assertThatStatisticsAreNotPresent(tableStatistics); + if (getHiveVersionMajor() < 3) { + BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsAreNotPresent(tableStatistics); + } BasicStatistics firstPartitionStatistics = getBasicStatisticsForPartition(onHive(), tableName, "n_regionkey=1"); assertThatStatisticsAreNonZero(firstPartitionStatistics); @@ -162,8 +163,10 @@ public void testAnalyzePartitioned() "WHERE n_regionkey = 1", tableName)); try { - BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); - assertThatStatisticsAreNotPresent(tableStatistics); + if (getHiveVersionMajor() < 3) { + BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsAreNotPresent(tableStatistics); + } BasicStatistics partitionStatisticsBefore = getBasicStatisticsForPartition(onHive(), tableName, "n_regionkey=1"); assertThatStatisticsArePresent(partitionStatisticsBefore); @@ -235,8 +238,10 @@ public void testInsertPartitioned() ") ", tableName)); try { - BasicStatistics tableStatisticsAfterCreate = getBasicStatisticsForTable(onHive(), tableName); - assertThatStatisticsAreNotPresent(tableStatisticsAfterCreate); + if (getHiveVersionMajor() < 3) { + BasicStatistics tableStatisticsAfterCreate = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsAreNotPresent(tableStatisticsAfterCreate); + } insertNationData(onPresto(), tableName); @@ -313,8 +318,10 @@ public void testInsertBucketedPartitioned() "WHERE n_regionkey = 1", tableName)); try { - BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); - assertThatStatisticsAreNotPresent(tableStatistics); + if (getHiveVersionMajor() < 3) { + BasicStatistics tableStatistics = getBasicStatisticsForTable(onHive(), tableName); + assertThatStatisticsAreNotPresent(tableStatistics); + } BasicStatistics firstPartitionStatistics = getBasicStatisticsForPartition(onHive(), tableName, "n_regionkey=1"); assertThatStatisticsAreNonZero(firstPartitionStatistics); From 8bbdb4cbfeb68ceee34dadf9f4b9df6582ce6683 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:58:59 +0200 Subject: [PATCH 06/17] Workaround new schema HDFS permission issue On HDP 3.1 new schema location is owned by `hive` on HDFS with `rwxr-xr-x` permissions. `alice` user cannot create a new table under the schema when HDFS impersonation is enabled. --- .../src/main/java/io/prestosql/tests/hive/TestRoles.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestRoles.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestRoles.java index b7cb3ce99a7d..5e9ad302a3af 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestRoles.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestRoles.java @@ -701,7 +701,7 @@ public void testAdminCanShowGrantsOnlyFromCurrentSchema() onPresto().executeQuery("CREATE SCHEMA hive.test"); onPresto().executeQuery("GRANT admin TO alice"); onPrestoAlice().executeQuery("SET ROLE ADMIN"); - onPrestoAlice().executeQuery("CREATE TABLE hive.test.test_table_bob (foo BIGINT)"); + onPrestoAlice().executeQuery("CREATE TABLE hive.test.test_table_bob (foo BIGINT) with (external_location='/tmp')"); QueryAssert.assertThat(onPrestoAlice().executeQuery("SHOW GRANTS ON hive.default.test_table_bob")) .containsOnly(ImmutableList.of( From 6fc3f9ca80a3d9992178f760e3d8d5a906c41289 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:59:00 +0200 Subject: [PATCH 07/17] Enable TestAvroSchemaUrl#testPartitionedTableWithLongColumnType on Hive 3 --- .../prestosql/tests/hive/HiveProductTest.java | 81 +++++++++++++++++++ .../tests/hive/TestAvroSchemaUrl.java | 7 +- 2 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 presto-product-tests/src/main/java/io/prestosql/tests/hive/HiveProductTest.java diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/HiveProductTest.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/HiveProductTest.java new file mode 100644 index 000000000000..4b69d688a7e8 --- /dev/null +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/HiveProductTest.java @@ -0,0 +1,81 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.tests.hive; + +import io.airlift.log.Logger; +import io.prestosql.tempto.ProductTest; +import io.prestosql.tempto.query.QueryExecutionException; + +import javax.annotation.concurrent.GuardedBy; + +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Strings.nullToEmpty; +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.prestosql.tests.utils.QueryExecutors.onHive; +import static java.lang.Integer.parseInt; + +public class HiveProductTest + extends ProductTest +{ + private static final Logger log = Logger.get(HiveProductTest.class); + + @GuardedBy("this") + private Integer hiveVersionMajor; + + protected synchronized int getHiveVersionMajor() + { + if (hiveVersionMajor == null) { + hiveVersionMajor = readVersionFromHive() + .orElseGet(HiveProductTest::readVersionFromHiveCommandLine); + } + + return hiveVersionMajor; + } + + private static Optional readVersionFromHive() + { + String hiveVersion; + try { + // version() is available in e.g. CDH 5 (Hive 1.1) and HDP 3.1 (Hive 3.1) + hiveVersion = getOnlyElement(onHive().executeQuery("SELECT version()").column(1)); + } + catch (QueryExecutionException e) { + if (nullToEmpty(e.getMessage()).contains("Invalid function 'version'")) { + log.info("version() function not found in Hive"); + return Optional.empty(); + } + throw e; + } + Matcher matcher = Pattern.compile("^(\\d+)\\.").matcher(hiveVersion); + checkState(matcher.lookingAt(), "Invalid Hive version: %s", hiveVersion); + int hiveVersionMajor = parseInt(matcher.group(1)); + log.info("Found Hive version major %s from Hive version '%s'", hiveVersionMajor, hiveVersion); + return Optional.of(hiveVersionMajor); + } + + // version() is not available in e.g. HDP 2.6 (Hive 1.2) + private static int readVersionFromHiveCommandLine() + { + String hiveServerCommand = getOnlyElement(onHive().executeQuery("SET system:sun.java.command").column(1)); + Matcher matcher = Pattern.compile("/hive-service-(\\d+)\\.[-0-9a-zA-Z.]+\\.jar").matcher(hiveServerCommand); + checkState(matcher.find(), "Cannot find Hive version in Hive command line: %s", hiveServerCommand); + int hiveVersionMajor = parseInt(matcher.group(1)); + log.info("Found Hive version major %s from Hive command line '%s'", hiveVersionMajor, hiveServerCommand); + return hiveVersionMajor; + } +} diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAvroSchemaUrl.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAvroSchemaUrl.java index a454d4f983e8..c6fea8ea6242 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAvroSchemaUrl.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAvroSchemaUrl.java @@ -17,7 +17,6 @@ import com.google.inject.Inject; import io.prestosql.tempto.AfterTestWithContext; import io.prestosql.tempto.BeforeTestWithContext; -import io.prestosql.tempto.ProductTest; import io.prestosql.tempto.hadoop.hdfs.HdfsClient; import io.prestosql.tempto.query.QueryExecutionException; import io.prestosql.tempto.query.QueryResult; @@ -37,7 +36,7 @@ import static java.lang.String.format; public class TestAvroSchemaUrl - extends ProductTest + extends HiveProductTest { @Inject private HdfsClient hdfsClient; @@ -187,11 +186,11 @@ public void testTableWithLongColumnType() @Test(groups = {STORAGE_FORMATS}) public void testPartitionedTableWithLongColumnType() { - if (isOnHdp()) { + if (isOnHdp() && getHiveVersionMajor() < 3) { // HDP 2.6 won't allow to define a partitioned table with schema having a column with type definition over 2000 characters. // It is possible to create table with simpler schema and then alter the schema, but that results in different end state on CDH. // To retain proper test coverage on CDH, this test needs to be disabled on HDP. - throw new SkipException("Skipping on HDP"); + throw new SkipException("Skipping on HDP 2"); } onHive().executeQuery("DROP TABLE IF EXISTS test_avro_schema_url_partitioned_long_column"); From ad98f999cccf18c9dcc6637e4462710cec7f7eed Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:59:02 +0200 Subject: [PATCH 08/17] Fix running product tests on HDP 3.1 Previously the `PRODUCT_TESTS_CONFIG=config-hdp3` configurations would still run on HDP 2.6. --- presto-product-tests/conf/product-tests-config-hdp3.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presto-product-tests/conf/product-tests-config-hdp3.sh b/presto-product-tests/conf/product-tests-config-hdp3.sh index 0422efd50f67..e1b0b887bada 100644 --- a/presto-product-tests/conf/product-tests-config-hdp3.sh +++ b/presto-product-tests/conf/product-tests-config-hdp3.sh @@ -1,2 +1,2 @@ -HADOOP_BASE_IMAGE="prestodev/hdp3.1-hive" -TEMPTO_ENVIRONMENT_CONFIG_FILE="/docker/volumes/conf/tempto/tempto-configuration-for-hdp3.yaml" +export HADOOP_BASE_IMAGE="prestodev/hdp3.1-hive" +export TEMPTO_ENVIRONMENT_CONFIG_FILE="/docker/volumes/conf/tempto/tempto-configuration-for-hdp3.yaml" From 86cc18a6e3c0052cc22a144a31bb1a7d7a7dfbe5 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:59:04 +0200 Subject: [PATCH 09/17] Run product tests on HDP 3.1 and 2.6 HDP 3.1 gives better test coverage in the future. Tests on HDP 2.6 will be run in "branch builds". --- .travis.yml | 27 +++++++++---------- .../docker-compose.yml | 2 +- .../docker-compose.yml | 2 +- .../conf/product-tests-config-hdp2.6.sh | 2 ++ .../conf/product-tests-config-hdp3.sh | 2 -- .../conf/product-tests-defaults.sh | 2 +- .../tempto-configuration-for-hadoop2.yaml | 8 ++++++ .../tempto/tempto-configuration-for-hdp3.yaml | 10 ------- .../main/resources/tempto-configuration.yaml | 4 ++- 9 files changed, 29 insertions(+), 30 deletions(-) create mode 100644 presto-product-tests/conf/product-tests-config-hdp2.6.sh delete mode 100644 presto-product-tests/conf/product-tests-config-hdp3.sh create mode 100644 presto-product-tests/conf/tempto/tempto-configuration-for-hadoop2.yaml delete mode 100644 presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml diff --git a/.travis.yml b/.travis.yml index 79be948b3510..03566fddf2dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,20 +34,19 @@ matrix: - env: HIVE_TESTS=true - env: KUDU_TESTS=true -# TODO -# # HDP 3 -# - if: type IN (push, cron) -# env: PRODUCT_TESTS_SUITE=suite-1 PRODUCT_TESTS_CONFIG=config-hdp3 -# - if: type IN (push, cron) -# env: PRODUCT_TESTS_SUITE=suite-2 PRODUCT_TESTS_CONFIG=config-hdp3 -# - if: type IN (push, cron) -# env: PRODUCT_TESTS_SUITE=suite-3 PRODUCT_TESTS_CONFIG=config-hdp3 -# - if: type IN (push, cron) -# env: PRODUCT_TESTS_SUITE=suite-4 PRODUCT_TESTS_CONFIG=config-hdp3 -# - if: type IN (push, cron) -# env: PRODUCT_TESTS_SUITE=suite-5 PRODUCT_TESTS_CONFIG=config-hdp3 -# - if: type IN (push, cron) -# env: HIVE_TESTS=true PRODUCT_TESTS_CONFIG=config-hdp3 + # HDP 2.6 + - if: type IN (push, cron) + env: PRODUCT_TESTS_SUITE=suite-1 PRODUCT_TESTS_CONFIG=config-hdp2.6 + - if: type IN (push, cron) + env: PRODUCT_TESTS_SUITE=suite-2 PRODUCT_TESTS_CONFIG=config-hdp2.6 + - if: type IN (push, cron) + env: PRODUCT_TESTS_SUITE=suite-3 PRODUCT_TESTS_CONFIG=config-hdp2.6 + - if: type IN (push, cron) + env: PRODUCT_TESTS_SUITE=suite-4 PRODUCT_TESTS_CONFIG=config-hdp2.6 + - if: type IN (push, cron) + env: PRODUCT_TESTS_SUITE=suite-5 PRODUCT_TESTS_CONFIG=config-hdp2.6 + - if: type IN (push, cron) + env: HIVE_TESTS=true PRODUCT_TESTS_CONFIG=config-hdp2.6 dist: xenial jdk: openjdk8 diff --git a/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-impersonation/docker-compose.yml b/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-impersonation/docker-compose.yml index 730278afc570..65eb8df722af 100644 --- a/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-impersonation/docker-compose.yml +++ b/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-impersonation/docker-compose.yml @@ -16,4 +16,4 @@ services: application-runner: image: 'prestodev/cdh5.15-hive-kerberized-kms:${DOCKER_IMAGES_VERSION}' environment: - - TEMPTO_PROFILE_CONFIG_FILE=/docker/volumes/conf/tempto/tempto-configuration-for-docker-kerberos-kms.yaml + - TEMPTO_PROFILE_CONFIG_FILE=/docker/volumes/conf/tempto/tempto-configuration-for-hadoop2.yaml,/docker/volumes/conf/tempto/tempto-configuration-for-docker-kerberos-kms.yaml diff --git a/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-no-impersonation/docker-compose.yml b/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-no-impersonation/docker-compose.yml index bbbb39754459..88143d6c21d9 100644 --- a/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-no-impersonation/docker-compose.yml +++ b/presto-product-tests/conf/docker/singlenode-kerberos-kms-hdfs-no-impersonation/docker-compose.yml @@ -14,4 +14,4 @@ services: application-runner: image: 'prestodev/cdh5.15-hive-kerberized-kms:${DOCKER_IMAGES_VERSION}' environment: - - TEMPTO_PROFILE_CONFIG_FILE=/docker/volumes/conf/tempto/tempto-configuration-for-docker-kerberos-kms.yaml + - TEMPTO_PROFILE_CONFIG_FILE=/docker/volumes/conf/tempto/tempto-configuration-for-hadoop2.yaml,/docker/volumes/conf/tempto/tempto-configuration-for-docker-kerberos-kms.yaml diff --git a/presto-product-tests/conf/product-tests-config-hdp2.6.sh b/presto-product-tests/conf/product-tests-config-hdp2.6.sh new file mode 100644 index 000000000000..e8174d2d512a --- /dev/null +++ b/presto-product-tests/conf/product-tests-config-hdp2.6.sh @@ -0,0 +1,2 @@ +export HADOOP_BASE_IMAGE="prestodev/hdp2.6-hive" +export TEMPTO_ENVIRONMENT_CONFIG_FILE="/docker/volumes/conf/tempto/tempto-configuration-for-hadoop2.yaml" diff --git a/presto-product-tests/conf/product-tests-config-hdp3.sh b/presto-product-tests/conf/product-tests-config-hdp3.sh deleted file mode 100644 index e1b0b887bada..000000000000 --- a/presto-product-tests/conf/product-tests-config-hdp3.sh +++ /dev/null @@ -1,2 +0,0 @@ -export HADOOP_BASE_IMAGE="prestodev/hdp3.1-hive" -export TEMPTO_ENVIRONMENT_CONFIG_FILE="/docker/volumes/conf/tempto/tempto-configuration-for-hdp3.yaml" diff --git a/presto-product-tests/conf/product-tests-defaults.sh b/presto-product-tests/conf/product-tests-defaults.sh index a71316628d74..020a0bfca4e3 100644 --- a/presto-product-tests/conf/product-tests-defaults.sh +++ b/presto-product-tests/conf/product-tests-defaults.sh @@ -1,2 +1,2 @@ export DOCKER_IMAGES_VERSION=${DOCKER_IMAGES_VERSION:-16} -export HADOOP_BASE_IMAGE=${HADOOP_BASE_IMAGE:-"prestodev/hdp2.6-hive"} +export HADOOP_BASE_IMAGE=${HADOOP_BASE_IMAGE:-"prestodev/hdp3.1-hive"} diff --git a/presto-product-tests/conf/tempto/tempto-configuration-for-hadoop2.yaml b/presto-product-tests/conf/tempto/tempto-configuration-for-hadoop2.yaml new file mode 100644 index 000000000000..bdc0e4636039 --- /dev/null +++ b/presto-product-tests/conf/tempto/tempto-configuration-for-hadoop2.yaml @@ -0,0 +1,8 @@ +hdfs: + webhdfs: + uri: http://${databases.hive.host}:50070 + +databases: + hive: + prepare_statement: + - USE ${databases.hive.schema} diff --git a/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml b/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml deleted file mode 100644 index 3be838b33579..000000000000 --- a/presto-product-tests/conf/tempto/tempto-configuration-for-hdp3.yaml +++ /dev/null @@ -1,10 +0,0 @@ -hdfs: - webhdfs: - uri: http://${databases.hive.host}:9870 - -databases: - hive: - prepare_statement: - - USE ${databases.hive.schema} - # Hive 3 gathers stats by default. For test purposes we need to disable this behavior. - - SET hive.stats.column.autogather=false diff --git a/presto-product-tests/src/main/resources/tempto-configuration.yaml b/presto-product-tests/src/main/resources/tempto-configuration.yaml index b008ce153c50..2abef2f1f66a 100644 --- a/presto-product-tests/src/main/resources/tempto-configuration.yaml +++ b/presto-product-tests/src/main/resources/tempto-configuration.yaml @@ -1,7 +1,7 @@ hdfs: username: hive webhdfs: - uri: http://${databases.hive.host}:50070 + uri: http://${databases.hive.host}:9870 databases: default: @@ -17,6 +17,8 @@ databases: schema: default prepare_statement: - USE ${databases.hive.schema} + # Hive 3 gathers stats by default. For test purposes we need to disable this behavior. + - SET hive.stats.column.autogather=false table_manager_type: hive warehouse_directory_path: /user/hive/warehouse path: /user/hive/warehouse From 0560c4f956fe18003338ff04ecae3427a2d9eb16 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:59:06 +0200 Subject: [PATCH 10/17] TMP Bump PT version --- presto-product-tests/conf/product-tests-defaults.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-product-tests/conf/product-tests-defaults.sh b/presto-product-tests/conf/product-tests-defaults.sh index 020a0bfca4e3..a2b468a5572c 100644 --- a/presto-product-tests/conf/product-tests-defaults.sh +++ b/presto-product-tests/conf/product-tests-defaults.sh @@ -1,2 +1,2 @@ -export DOCKER_IMAGES_VERSION=${DOCKER_IMAGES_VERSION:-16} +export DOCKER_IMAGES_VERSION=${DOCKER_IMAGES_VERSION:-17} export HADOOP_BASE_IMAGE=${HADOOP_BASE_IMAGE:-"prestodev/hdp3.1-hive"} From 74ccc2c4cdfa52c70db82edd4c03de553fbe3a97 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:59:08 +0200 Subject: [PATCH 11/17] Workaround weak hashing of floating point in Hive bucketing v1 The test expects all buckets are populated. However, Hive bucketing v1 uses float's bits as a hash, and for small float numbers, the last 5 bits (i.e. module 32 when there are 32 buckets) are usually zeros. We still got a few buckets (more than one) in `presto_test_bucketed_by_double_float`, because another hashed column was double. --- .../java/io/prestosql/plugin/hive/AbstractTestHive.java | 6 +++--- presto-hive/src/test/sql/create-test.sql | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java index 4c43ca7ae5fa..dd746552a338 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java @@ -1518,7 +1518,7 @@ public void testBucketedTableDoubleFloat() .build(); // floats and doubles are not supported, so we should see all splits - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(32), Optional.empty()); + MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(11), Optional.empty()); assertEquals(result.getRowCount(), 100); } } @@ -1653,9 +1653,9 @@ private static void assertBucketTableEvolutionResult(MaterializedResult result, private void assertTableIsBucketed(ConnectorTableHandle tableHandle) { - // the bucketed test tables should have exactly 32 splits + // the bucketed test tables should have exactly 11 splits List splits = getAllSplits(tableHandle); - assertEquals(splits.size(), 32); + assertEquals(splits.size(), 11); // verify all paths are unique Set paths = new HashSet<>(); diff --git a/presto-hive/src/test/sql/create-test.sql b/presto-hive/src/test/sql/create-test.sql index 1834d5fb2c0d..0d65830bbc94 100644 --- a/presto-hive/src/test/sql/create-test.sql +++ b/presto-hive/src/test/sql/create-test.sql @@ -61,7 +61,7 @@ CREATE TABLE presto_test_bucketed_by_string_int ( ) COMMENT 'Presto test bucketed table' PARTITIONED BY (ds STRING) -CLUSTERED BY (t_string, t_int) INTO 32 BUCKETS +CLUSTERED BY (t_string, t_int) INTO 11 BUCKETS -- TODO https://github.com/prestosql/presto/issues/538 restore 32 buckets STORED AS RCFILE TBLPROPERTIES ('bucketing_version'='1') -- TODO https://github.com/prestosql/presto/issues/538 remove ; @@ -78,7 +78,7 @@ CREATE TABLE presto_test_bucketed_by_bigint_boolean ( ) COMMENT 'Presto test bucketed table' PARTITIONED BY (ds STRING) -CLUSTERED BY (t_bigint, t_boolean) INTO 32 BUCKETS +CLUSTERED BY (t_bigint, t_boolean) INTO 11 BUCKETS -- TODO https://github.com/prestosql/presto/issues/538 restore 32 buckets STORED AS RCFILE TBLPROPERTIES ('bucketing_version'='1') -- TODO https://github.com/prestosql/presto/issues/538 remove ; @@ -95,7 +95,7 @@ CREATE TABLE presto_test_bucketed_by_double_float ( ) COMMENT 'Presto test bucketed table' PARTITIONED BY (ds STRING) -CLUSTERED BY (t_double, t_float) INTO 32 BUCKETS +CLUSTERED BY (t_double, t_float) INTO 11 BUCKETS -- TODO https://github.com/prestosql/presto/issues/538 restore 32 buckets STORED AS RCFILE TBLPROPERTIES ('bucketing_version'='1') -- TODO https://github.com/prestosql/presto/issues/538 remove ; From 9d34dff1bccc9eaf0a2c5ddfeaf40a05b26061d5 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 21 Aug 2019 14:59:10 +0200 Subject: [PATCH 12/17] Remove duplicate configuration There is a second `-Dhive.hadoop2.timeZone` on the same invocation. --- presto-hive-hadoop2/bin/run_hive_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/presto-hive-hadoop2/bin/run_hive_tests.sh b/presto-hive-hadoop2/bin/run_hive_tests.sh index 7f47ca91a405..18c419f37a8f 100755 --- a/presto-hive-hadoop2/bin/run_hive_tests.sh +++ b/presto-hive-hadoop2/bin/run_hive_tests.sh @@ -23,7 +23,6 @@ HADOOP_MASTER_IP=$(hadoop_master_ip) pushd ${PROJECT_ROOT} set +e ./mvnw -B -pl presto-hive-hadoop2 test -P test-hive-hadoop2 \ - -Dhive.hadoop2.timeZone=UTC \ -DHADOOP_USER_NAME=hive \ -Dhive.hadoop2.metastoreHost=localhost \ -Dhive.hadoop2.metastorePort=9083 \ From ddd3bd7aa259d1c7349f126f3f987d127ed5c2aa Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 4 Sep 2019 10:39:29 +0200 Subject: [PATCH 13/17] Pass storage time zone to ORC stream reader --- .../io/prestosql/orc/OrcRecordReader.java | 5 +++-- .../main/java/io/prestosql/orc/Stripe.java | 20 +++++++++++++------ .../java/io/prestosql/orc/StripeReader.java | 15 +++++++------- .../orc/reader/BooleanStreamReader.java | 2 +- .../orc/reader/ByteStreamReader.java | 2 +- .../orc/reader/DecimalStreamReader.java | 2 +- .../orc/reader/DoubleStreamReader.java | 2 +- .../orc/reader/FloatStreamReader.java | 2 +- .../orc/reader/ListStreamReader.java | 4 ++-- .../orc/reader/LongStreamReader.java | 2 +- .../prestosql/orc/reader/MapStreamReader.java | 6 +++--- .../reader/SliceDictionaryStreamReader.java | 2 +- .../orc/reader/SliceDirectStreamReader.java | 2 +- .../orc/reader/SliceStreamReader.java | 4 ++-- .../io/prestosql/orc/reader/StreamReader.java | 2 +- .../orc/reader/StructStreamReader.java | 4 ++-- .../orc/reader/TimestampStreamReader.java | 4 ++-- 17 files changed, 45 insertions(+), 35 deletions(-) diff --git a/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java b/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java index 61bdca4ceeb3..f82014316227 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java @@ -500,10 +500,11 @@ private void advanceToNextStripe() // Give readers access to dictionary streams InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources(); List columnEncodings = stripe.getColumnEncodings(); - ZoneId timeZone = stripe.getTimeZone(); + ZoneId fileTimeZone = stripe.getFileTimeZone(); + ZoneId storageTimeZone = stripe.getStorageTimeZone(); for (StreamReader column : streamReaders) { if (column != null) { - column.startStripe(timeZone, dictionaryStreamSources, columnEncodings); + column.startStripe(fileTimeZone, storageTimeZone, dictionaryStreamSources, columnEncodings); } } diff --git a/presto-orc/src/main/java/io/prestosql/orc/Stripe.java b/presto-orc/src/main/java/io/prestosql/orc/Stripe.java index f43800ce5018..dfc37a7b513a 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/Stripe.java +++ b/presto-orc/src/main/java/io/prestosql/orc/Stripe.java @@ -26,15 +26,17 @@ public class Stripe { private final long rowCount; - private final ZoneId timeZone; + private final ZoneId fileTimeZone; + private final ZoneId storageTimeZone; private final List columnEncodings; private final List rowGroups; private final InputStreamSources dictionaryStreamSources; - public Stripe(long rowCount, ZoneId timeZone, List columnEncodings, List rowGroups, InputStreamSources dictionaryStreamSources) + public Stripe(long rowCount, ZoneId fileTimeZone, ZoneId storageTimeZone, List columnEncodings, List rowGroups, InputStreamSources dictionaryStreamSources) { this.rowCount = rowCount; - this.timeZone = requireNonNull(timeZone, "timeZone is null"); + this.fileTimeZone = requireNonNull(fileTimeZone, "fileTimeZone is null"); + this.storageTimeZone = requireNonNull(storageTimeZone, "storageTimeZone is null"); this.columnEncodings = requireNonNull(columnEncodings, "columnEncodings is null"); this.rowGroups = ImmutableList.copyOf(requireNonNull(rowGroups, "rowGroups is null")); this.dictionaryStreamSources = requireNonNull(dictionaryStreamSources, "dictionaryStreamSources is null"); @@ -45,9 +47,14 @@ public long getRowCount() return rowCount; } - public ZoneId getTimeZone() + public ZoneId getFileTimeZone() { - return timeZone; + return fileTimeZone; + } + + public ZoneId getStorageTimeZone() + { + return storageTimeZone; } public List getColumnEncodings() @@ -70,7 +77,8 @@ public String toString() { return toStringHelper(this) .add("rowCount", rowCount) - .add("timeZone", timeZone) + .add("fileTimeZone", fileTimeZone) + .add("storageTimeZone", storageTimeZone) .add("columnEncodings", columnEncodings) .add("rowGroups", rowGroups) .add("dictionaryStreams", dictionaryStreamSources) diff --git a/presto-orc/src/main/java/io/prestosql/orc/StripeReader.java b/presto-orc/src/main/java/io/prestosql/orc/StripeReader.java index 327374301935..6d63766b7a14 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/StripeReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/StripeReader.java @@ -74,7 +74,7 @@ public class StripeReader { private final OrcDataSource orcDataSource; - private final ZoneId defaultTimeZone; + private final ZoneId storageTimeZone; private final Optional decompressor; private final List types; private final HiveWriterVersion hiveWriterVersion; @@ -84,8 +84,9 @@ public class StripeReader private final MetadataReader metadataReader; private final Optional writeValidation; - public StripeReader(OrcDataSource orcDataSource, - ZoneId defaultTimeZone, + public StripeReader( + OrcDataSource orcDataSource, + ZoneId storageTimeZone, Optional decompressor, List types, Set includedColumns, @@ -96,7 +97,7 @@ public StripeReader(OrcDataSource orcDataSource, Optional writeValidation) { this.orcDataSource = requireNonNull(orcDataSource, "orcDataSource is null"); - this.defaultTimeZone = requireNonNull(defaultTimeZone, "defaultTimeZone is null"); + this.storageTimeZone = requireNonNull(storageTimeZone, "storageTimeZone is null"); this.decompressor = requireNonNull(decompressor, "decompressor is null"); this.types = ImmutableList.copyOf(requireNonNull(types, "types is null")); this.includedOrcColumns = getIncludedOrcColumns(types, requireNonNull(includedColumns, "includedColumns is null")); @@ -116,7 +117,7 @@ public Stripe readStripe(StripeInformation stripe, AggregatedMemoryContext syste if (writeValidation.isPresent()) { writeValidation.get().validateTimeZone(orcDataSource.getId(), stripeFooter.getTimeZone().orElse(null)); } - ZoneId timeZone = stripeFooter.getTimeZone().orElse(defaultTimeZone); + ZoneId fileTimeZone = stripeFooter.getTimeZone().orElse(storageTimeZone); // get streams for selected columns Map streams = new HashMap<>(); @@ -171,7 +172,7 @@ public Stripe readStripe(StripeInformation stripe, AggregatedMemoryContext syste selectedRowGroups, columnEncodings); - return new Stripe(stripe.getNumberOfRows(), timeZone, columnEncodings, rowGroups, dictionaryStreamSources); + return new Stripe(stripe.getNumberOfRows(), fileTimeZone, storageTimeZone, columnEncodings, rowGroups, dictionaryStreamSources); } catch (InvalidCheckpointException e) { // The ORC file contains a corrupt checkpoint stream treat the stripe as a single row group. @@ -225,7 +226,7 @@ public Stripe readStripe(StripeInformation stripe, AggregatedMemoryContext syste } RowGroup rowGroup = new RowGroup(0, 0, stripe.getNumberOfRows(), minAverageRowBytes, new InputStreamSources(builder.build())); - return new Stripe(stripe.getNumberOfRows(), timeZone, columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources); + return new Stripe(stripe.getNumberOfRows(), fileTimeZone, storageTimeZone, columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources); } private static boolean isSupportedStreamType(Stream stream, OrcTypeKind orcTypeKind) diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/BooleanStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/BooleanStreamReader.java index 6a95f9fa6b3e..2c151d34a564 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/BooleanStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/BooleanStreamReader.java @@ -175,7 +175,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(BooleanInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/ByteStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/ByteStreamReader.java index bcbeaa33f679..5018b5b5c7fc 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/ByteStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/ByteStreamReader.java @@ -176,7 +176,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(ByteInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/DecimalStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/DecimalStreamReader.java index a4e749f1d7a4..77cb634333a6 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/DecimalStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/DecimalStreamReader.java @@ -306,7 +306,7 @@ private void seekToOffset() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); decimalStreamSource = missingStreamSource(DecimalInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/DoubleStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/DoubleStreamReader.java index fcb64d58fbd5..cab48bd1f81a 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/DoubleStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/DoubleStreamReader.java @@ -178,7 +178,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(DoubleInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/FloatStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/FloatStreamReader.java index df8f11aecca4..a312f8ed234c 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/FloatStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/FloatStreamReader.java @@ -177,7 +177,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(FloatInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/ListStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/ListStreamReader.java index fd3cf8ea4126..6a666e3ab01f 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/ListStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/ListStreamReader.java @@ -162,7 +162,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) throws IOException { presentStreamSource = missingStreamSource(BooleanInputStream.class); @@ -176,7 +176,7 @@ public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSour rowGroupOpen = false; - elementStreamReader.startStripe(timeZone, dictionaryStreamSources, encoding); + elementStreamReader.startStripe(fileTimeZone, storageTimeZone, dictionaryStreamSources, encoding); } @Override diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/LongStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/LongStreamReader.java index c115e7b1ab99..272f17521fa5 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/LongStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/LongStreamReader.java @@ -251,7 +251,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(LongInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/MapStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/MapStreamReader.java index b578db71f4f5..d26e0eedd1e2 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/MapStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/MapStreamReader.java @@ -221,7 +221,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) throws IOException { presentStreamSource = missingStreamSource(BooleanInputStream.class); @@ -235,8 +235,8 @@ public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSour rowGroupOpen = false; - keyStreamReader.startStripe(timeZone, dictionaryStreamSources, encoding); - valueStreamReader.startStripe(timeZone, dictionaryStreamSources, encoding); + keyStreamReader.startStripe(fileTimeZone, storageTimeZone, dictionaryStreamSources, encoding); + valueStreamReader.startStripe(fileTimeZone, storageTimeZone, dictionaryStreamSources, encoding); } @Override diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDictionaryStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDictionaryStreamReader.java index a9a061cacbaf..8e1dfee0b01b 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDictionaryStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDictionaryStreamReader.java @@ -313,7 +313,7 @@ private static void readDictionary( } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { dictionaryDataStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, ByteArrayInputStream.class); dictionaryLengthStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, LENGTH, LongInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDirectStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDirectStreamReader.java index ad503d1860e4..51d930a7e7f6 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDirectStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/SliceDirectStreamReader.java @@ -246,7 +246,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { presentStreamSource = missingStreamSource(BooleanInputStream.class); lengthStreamSource = missingStreamSource(LongInputStream.class); diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/SliceStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/SliceStreamReader.java index 89fe236e163b..e1affb74e6ee 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/SliceStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/SliceStreamReader.java @@ -84,7 +84,7 @@ public void prepareNextRead(int batchSize) } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) throws IOException { ColumnEncodingKind columnEncodingKind = encoding.get(streamDescriptor.getStreamId()).getColumnEncodingKind(); @@ -98,7 +98,7 @@ else if (columnEncodingKind == DICTIONARY || columnEncodingKind == DICTIONARY_V2 throw new IllegalArgumentException("Unsupported encoding " + columnEncodingKind); } - currentReader.startStripe(timeZone, dictionaryStreamSources, encoding); + currentReader.startStripe(fileTimeZone, storageTimeZone, dictionaryStreamSources, encoding); } @Override diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/StreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/StreamReader.java index a9221244fab6..b13635dc13a9 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/StreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/StreamReader.java @@ -28,7 +28,7 @@ Block readBlock() void prepareNextRead(int batchSize); - void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) throws IOException; void startRowGroup(InputStreamSources dataStreamSources) diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/StructStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/StructStreamReader.java index 2affc273b2c1..375812b5c707 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/StructStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/StructStreamReader.java @@ -170,7 +170,7 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) throws IOException { presentStreamSource = missingStreamSource(BooleanInputStream.class); @@ -183,7 +183,7 @@ public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSour rowGroupOpen = false; for (StreamReader structField : structFields.values()) { - structField.startStripe(timeZone, dictionaryStreamSources, encoding); + structField.startStripe(fileTimeZone, storageTimeZone, dictionaryStreamSources, encoding); } } diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java index 8e727d2529f6..240cd8b3f15c 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java @@ -201,9 +201,9 @@ private void openRowGroup() } @Override - public void startStripe(ZoneId timeZone, InputStreamSources dictionaryStreamSources, List encoding) + public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStreamSources dictionaryStreamSources, List encoding) { - baseTimestampInSeconds = ZonedDateTime.of(2015, 1, 1, 0, 0, 0, 0, timeZone).toEpochSecond(); + baseTimestampInSeconds = ZonedDateTime.of(2015, 1, 1, 0, 0, 0, 0, fileTimeZone).toEpochSecond(); presentStreamSource = missingStreamSource(BooleanInputStream.class); secondsStreamSource = missingStreamSource(LongInputStream.class); From edb0913e806d5d10791e180eaacd512f77cbe2f5 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 4 Sep 2019 10:40:23 +0200 Subject: [PATCH 14/17] Add compatibility with Hive 3.1 ORC timestamps --- .../orc/reader/TimestampStreamReader.java | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java b/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java index 240cd8b3f15c..a298d9f50d52 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/reader/TimestampStreamReader.java @@ -24,17 +24,21 @@ import io.prestosql.spi.block.Block; import io.prestosql.spi.block.LongArrayBlock; import io.prestosql.spi.block.RunLengthEncodedBlock; +import io.prestosql.spi.type.TimeZoneKey; import io.prestosql.spi.type.TimestampType; import io.prestosql.spi.type.Type; +import org.joda.time.DateTimeZone; import org.openjdk.jol.info.ClassLayout; import javax.annotation.Nullable; import java.io.IOException; +import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; import java.util.List; import java.util.Optional; +import java.util.function.LongUnaryOperator; import static com.google.common.base.MoreObjects.toStringHelper; import static io.airlift.slice.SizeOf.sizeOf; @@ -43,6 +47,7 @@ import static io.prestosql.orc.metadata.Stream.StreamKind.SECONDARY; import static io.prestosql.orc.reader.ReaderUtils.verifyStreamType; import static io.prestosql.orc.stream.MissingInputStreamSource.missingStreamSource; +import static io.prestosql.spi.type.TimeZoneKey.UTC_KEY; import static io.prestosql.spi.type.TimestampType.TIMESTAMP; import static java.util.Objects.requireNonNull; @@ -56,6 +61,7 @@ public class TimestampStreamReader private final StreamDescriptor streamDescriptor; private long baseTimestampInSeconds; + private Optional timestampConversion; private int readOffset; private int nextBatchSize; @@ -168,6 +174,11 @@ private Block readNonNullBlock() for (int i = 0; i < nextBatchSize; i++) { values[i] = decodeTimestamp(secondsStream.next(), nanosStream.next(), baseTimestampInSeconds); } + if (timestampConversion.isPresent()) { + for (int i = 0; i < nextBatchSize; i++) { + values[i] = timestampConversion.get().applyAsLong(values[i]); + } + } return new LongArrayBlock(nextBatchSize, Optional.empty(), values); } @@ -187,6 +198,13 @@ private Block readNullBlock(boolean[] isNull) values[i] = decodeTimestamp(secondsStream.next(), nanosStream.next(), baseTimestampInSeconds); } } + if (timestampConversion.isPresent()) { + for (int i = 0; i < isNull.length; i++) { + if (!isNull[i]) { + values[i] = timestampConversion.get().applyAsLong(values[i]); + } + } + } return new LongArrayBlock(isNull.length, Optional.of(isNull), values); } @@ -205,6 +223,25 @@ public void startStripe(ZoneId fileTimeZone, ZoneId storageTimeZone, InputStream { baseTimestampInSeconds = ZonedDateTime.of(2015, 1, 1, 0, 0, 0, 0, fileTimeZone).toEpochSecond(); + /* + * In legacy semantics, timestamp represents a point in time. ORC effectively stores millis and zone (like ZonedDateTime). + * Hive interprets the ORC value as local date/time in file time zone. + * We need to calculate point in time corresponding to (local date/time read from ORC) at Hive warehouse time zone. + * + * TODO support new timestamp semantics + */ + DateTimeZone storageDateTimeZone = DateTimeZone.forID(storageTimeZone.getId()); + if (fileTimeZone.equals(storageTimeZone)) { + timestampConversion = Optional.empty(); + } + else if (TimeZoneKey.getTimeZoneKey(fileTimeZone.getId()).equals(UTC_KEY)) { // getTimeZoneKey identified UTC-equivalent zones + timestampConversion = Optional.of(value -> storageDateTimeZone.convertLocalToUTC(value, false)); + } + else { + DateTimeZone fileDateTimeZone = DateTimeZone.forID(fileTimeZone.getId()); + timestampConversion = Optional.of(value -> fileDateTimeZone.getMillisKeepLocal(storageDateTimeZone, value)); + } + presentStreamSource = missingStreamSource(BooleanInputStream.class); secondsStreamSource = missingStreamSource(LongInputStream.class); nanosStreamSource = missingStreamSource(LongInputStream.class); From 80fd4b156c5d1be7fb2e1b1ce161b11946f2f36a Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 22 Aug 2019 13:28:19 +0200 Subject: [PATCH 15/17] Update statistics tests for Hive 3 --- .../io/prestosql/tests/hive/TestHiveTableStatistics.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java index 516759282c5a..10f4ee59e708 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveTableStatistics.java @@ -201,8 +201,8 @@ public void testStatisticsForUnpartitionedTable() onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS"); assertThat(query(showStatsWholeTable)).containsOnly( - row("n_nationkey", null, 19.0, 0.0, null, "0", "24"), - row("n_name", 177.0, 24.0, 0.0, null, null, null), + row("n_nationkey", null, anyOf(19., 25.), 0.0, null, "0", "24"), + row("n_name", 177.0, anyOf(24., 25.), 0.0, null, null, null), row("n_regionkey", null, 5.0, 0.0, null, "0", "4"), row("n_comment", 1857.0, 25.0, 0.0, null, null, null), row(null, null, null, null, 25.0, null, null)); @@ -328,7 +328,7 @@ public void testStatisticsForTablePartitionedByBigint() row(null, null, null, null, 5.0, null, null)); assertThat(query(showStatsPartitionTwo)).containsOnly( - row("p_nationkey", null, 4.0, 0.0, null, "8", "21"), + row("p_nationkey", null, anyOf(4., 5.), 0.0, null, "8", "21"), row("p_name", 31.0, 5.0, 0.0, null, null, null), row("p_regionkey", null, 1.0, 0.0, null, "2", "2"), row("p_comment", 351.0, 5.0, 0.0, null, null, null), @@ -455,7 +455,7 @@ public void testStatisticsForTablePartitionedByVarchar() row(null, null, null, null, 5.0, null, null)); assertThat(query(showStatsPartitionTwo)).containsOnly( - row("p_nationkey", null, 4.0, 0.0, null, "8", "21"), + row("p_nationkey", null, anyOf(4., 5.), 0.0, null, "8", "21"), row("p_name", 31.0, 5.0, 0.0, null, null, null), row("p_regionkey", 20.0, 1.0, 0.0, null, null, null), row("p_comment", 351.0, 5.0, 0.0, null, null, null), From 545e586555feb12e3c2131bc0d712fcc5833c406 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 9 Sep 2019 09:21:46 +0200 Subject: [PATCH 16/17] Disable certain schema evolution tests Hive 3 is less permissive when it comes to schema evolution. --- .../plugin/hive/AbstractTestHive.java | 24 +++---- .../tests/hive/TestHiveCoercion.java | 66 +++++++++---------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java index dd746552a338..2155168135a2 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHive.java @@ -341,7 +341,7 @@ public abstract class AbstractTestHive .add(new ColumnMetadata("smallint_to_bigint", SMALLINT)) .add(new ColumnMetadata("integer_to_bigint", INTEGER)) .add(new ColumnMetadata("integer_to_varchar", INTEGER)) - .add(new ColumnMetadata("varchar_to_integer", createUnboundedVarcharType())) + //.add(new ColumnMetadata("varchar_to_integer", createUnboundedVarcharType())) // this coercion is not permitted in Hive 3. TODO test this on Hive < 3. .add(new ColumnMetadata("float_to_double", REAL)) .add(new ColumnMetadata("varchar_to_drop_in_row", createUnboundedVarcharType())) .build(); @@ -363,10 +363,10 @@ private static RowType toRowType(List columns) private static final MaterializedResult MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_BEFORE = MaterializedResult.resultBuilder(SESSION, TINYINT, TINYINT, TINYINT, SMALLINT, SMALLINT, INTEGER, INTEGER, createUnboundedVarcharType(), REAL, createUnboundedVarcharType()) - .row((byte) -11, (byte) 12, (byte) -13, (short) 14, (short) 15, -16, 17, "2147483647", 18.0f, "2016-08-01") - .row((byte) 21, (byte) -22, (byte) 23, (short) -24, (short) 25, 26, -27, "asdf", -28.0f, "2016-08-02") - .row((byte) -31, (byte) -32, (byte) 33, (short) 34, (short) -35, 36, 37, "-923", 39.5f, "2016-08-03") - .row(null, (byte) 42, (byte) 43, (short) 44, (short) -45, 46, 47, "2147483648", 49.5f, "2016-08-03") + .row((byte) -11, (byte) 12, (byte) -13, (short) 14, (short) 15, -16, 17, /*"2147483647",*/ 18.0f, "2016-08-01") + .row((byte) 21, (byte) -22, (byte) 23, (short) -24, (short) 25, 26, -27, /*"asdf",*/ -28.0f, "2016-08-02") + .row((byte) -31, (byte) -32, (byte) 33, (short) 34, (short) -35, 36, 37, /*"-923",*/ 39.5f, "2016-08-03") + .row(null, (byte) 42, (byte) 43, (short) 44, (short) -45, 46, 47, /*"2147483648",*/ 49.5f, "2016-08-03") .build(); private static final MaterializedResult MISMATCH_SCHEMA_TABLE_DATA_BEFORE = @@ -379,7 +379,7 @@ private static RowType toRowType(List columns) result.add(rowResult); result.add(Arrays.asList(rowResult, null, rowResult)); result.add(ImmutableMap.of(rowResult.get(1), rowResult)); - result.add(rowResult.get(9)); + result.add(rowResult.get(8)); return new MaterializedRow(materializedRow.getPrecision(), result); }).collect(toList())) .build(); @@ -392,7 +392,7 @@ private static RowType toRowType(List columns) .add(new ColumnMetadata("smallint_to_bigint", BIGINT)) .add(new ColumnMetadata("integer_to_bigint", BIGINT)) .add(new ColumnMetadata("integer_to_varchar", createUnboundedVarcharType())) - .add(new ColumnMetadata("varchar_to_integer", INTEGER)) + //.add(new ColumnMetadata("varchar_to_integer", INTEGER)) .add(new ColumnMetadata("float_to_double", DOUBLE)) .add(new ColumnMetadata("varchar_to_drop_in_row", createUnboundedVarcharType())) .build(); @@ -413,10 +413,10 @@ private static RowType toRowType(List columns) private static final MaterializedResult MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_AFTER = MaterializedResult.resultBuilder(SESSION, SMALLINT, INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, createUnboundedVarcharType(), INTEGER, DOUBLE, createUnboundedVarcharType()) - .row((short) -11, 12, -13L, 14, 15L, -16L, "17", 2147483647, 18.0, "2016-08-01") - .row((short) 21, -22, 23L, -24, 25L, 26L, "-27", null, -28.0, "2016-08-02") - .row((short) -31, -32, 33L, 34, -35L, 36L, "37", -923, 39.5, "2016-08-03") - .row(null, 42, 43L, 44, -45L, 46L, "47", null, 49.5, "2016-08-03") + .row((short) -11, 12, -13L, 14, 15L, -16L, "17", /*2147483647,*/ 18.0, "2016-08-01") + .row((short) 21, -22, 23L, -24, 25L, 26L, "-27", /*null,*/ -28.0, "2016-08-02") + .row((short) -31, -32, 33L, 34, -35L, 36L, "37", /*-923,*/ 39.5, "2016-08-03") + .row(null, 42, 43L, 44, -45L, 46L, "47", /*null,*/ 49.5, "2016-08-03") .build(); private static final MaterializedResult MISMATCH_SCHEMA_TABLE_DATA_AFTER = @@ -431,7 +431,7 @@ private static RowType toRowType(List columns) result.add(appendFieldRowResult); result.add(Arrays.asList(appendFieldRowResult, null, appendFieldRowResult)); result.add(ImmutableMap.of(result.get(1), dropFieldRowResult)); - result.add(result.get(9)); + result.add(result.get(8)); return new MaterializedRow(materializedRow.getPrecision(), result); }).collect(toList())) .build(); diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveCoercion.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveCoercion.java index ba65d5329eaa..e45afb480df2 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveCoercion.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveCoercion.java @@ -115,13 +115,13 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " int_to_bigint INT," + " bigint_to_varchar BIGINT," + " float_to_double " + floatType + "," + - " double_to_float DOUBLE," + + //" double_to_float DOUBLE," + // this coercion is not permitted in Hive 3. TODO test this on Hive < 3. " shortdecimal_to_shortdecimal DECIMAL(10,2)," + " shortdecimal_to_longdecimal DECIMAL(10,2)," + " longdecimal_to_shortdecimal DECIMAL(20,12)," + " longdecimal_to_longdecimal DECIMAL(20,12)," + - " float_to_decimal " + floatType + "," + - " double_to_decimal DOUBLE," + + //" float_to_decimal " + floatType + "," + // this coercion is not permitted in Hive 3. TODO test this on Hive < 3. + //" double_to_decimal DOUBLE," + // this coercion is not permitted in Hive 3. TODO test this on Hive < 3. " decimal_to_float DECIMAL(10,5)," + " decimal_to_double DECIMAL(10,5)" + ") " + @@ -288,13 +288,13 @@ private void doTestHiveCoercion(HiveTableDefinition tableDefinition) " INTEGER '2323', " + " 12345, " + " REAL '0.5', " + - " DOUBLE '0.5', " + + //" DOUBLE '0.5', " + " DECIMAL '12345678.12', " + " DECIMAL '12345678.12', " + " DECIMAL '12345678.123456123456', " + " DECIMAL '12345678.123456123456', " + - " %2$s '12345.12345', " + - " DOUBLE '12345.12345', " + + //" %2$s '12345.12345', " + + //" DOUBLE '12345.12345', " + " DECIMAL '12345.12345', " + " DECIMAL '12345.12345', " + " 1), " + @@ -310,13 +310,13 @@ private void doTestHiveCoercion(HiveTableDefinition tableDefinition) " INTEGER '-2323', " + " -12345, " + " REAL '-1.5', " + - " DOUBLE '-1.5', " + + //" DOUBLE '-1.5', " + " DECIMAL '-12345678.12', " + " DECIMAL '-12345678.12', " + " DECIMAL '-12345678.123456123456', " + " DECIMAL '-12345678.123456123456', " + - " %2$s '-12345.12345', " + - " DOUBLE '-12345.12345', " + + //" %2$s '-12345.12345', " + + //" DOUBLE '-12345.12345', " + " DECIMAL '-12345.12345', " + " DECIMAL '-12345.12345', " + " 1)", @@ -344,13 +344,13 @@ private void doTestHiveCoercion(HiveTableDefinition tableDefinition) 2323L, "12345", 0.5, - 0.5, + //0.5, new BigDecimal("12345678.1200"), new BigDecimal("12345678.1200"), new BigDecimal("12345678.12"), new BigDecimal("12345678.12345612345600"), - new BigDecimal(floatToDecimalVal), - new BigDecimal("12345.12345"), + //new BigDecimal(floatToDecimalVal), + //new BigDecimal("12345.12345"), Float.parseFloat(decimalToFloatVal), 12345.12345, 1), @@ -366,13 +366,13 @@ private void doTestHiveCoercion(HiveTableDefinition tableDefinition) -2323L, "-12345", -1.5, - -1.5, + //-1.5, new BigDecimal("-12345678.1200"), new BigDecimal("-12345678.1200"), new BigDecimal("-12345678.12"), new BigDecimal("-12345678.12345612345600"), - new BigDecimal("-" + floatToDecimalVal), - new BigDecimal("-12345.12345"), + //new BigDecimal("-" + floatToDecimalVal), + //new BigDecimal("-12345.12345"), -Float.parseFloat(decimalToFloatVal), -12345.12345, 1)); @@ -391,13 +391,13 @@ else if (usingSimbaJdbcDriver(connection)) { 2323L, "12345", 0.5, - 0.5, + //0.5, new BigDecimal("12345678.1200"), new BigDecimal("12345678.1200"), new BigDecimal("12345678.12"), new BigDecimal("12345678.12345612345600"), - new BigDecimal(floatToDecimalVal), - new BigDecimal("12345.12345"), + //new BigDecimal(floatToDecimalVal), + //new BigDecimal("12345.12345"), Float.parseFloat(decimalToFloatVal), 12345.12345, 1), @@ -413,13 +413,13 @@ else if (usingSimbaJdbcDriver(connection)) { -2323L, "-12345", -1.5, - -1.5, + //-1.5, new BigDecimal("-12345678.1200"), new BigDecimal("-12345678.1200"), new BigDecimal("-12345678.12"), new BigDecimal("-12345678.12345612345600"), - new BigDecimal("-" + floatToDecimalVal), - new BigDecimal("-12345.12345"), + //new BigDecimal("-" + floatToDecimalVal), + //new BigDecimal("-12345.12345"), -Float.parseFloat(decimalToFloatVal), -12345.12345, 1)); @@ -462,13 +462,13 @@ private void assertProperAlteredTableSchema(String tableName) row("int_to_bigint", "bigint"), row("bigint_to_varchar", "varchar"), row("float_to_double", "double"), - row("double_to_float", floatType), + //row("double_to_float", floatType), row("shortdecimal_to_shortdecimal", "decimal(18,4)"), row("shortdecimal_to_longdecimal", "decimal(20,4)"), row("longdecimal_to_shortdecimal", "decimal(12,2)"), row("longdecimal_to_longdecimal", "decimal(38,14)"), - row("float_to_decimal", "decimal(10,5)"), - row("double_to_decimal", "decimal(10,5)"), + //row("float_to_decimal", "decimal(10,5)"), + //row("double_to_decimal", "decimal(10,5)"), row("decimal_to_float", floatType), row("decimal_to_double", "double"), row("id", "bigint")); @@ -492,13 +492,13 @@ private void assertColumnTypes(QueryResult queryResult, String tableName) BIGINT, VARCHAR, DOUBLE, - floatType, - DECIMAL, - DECIMAL, + //floatType, DECIMAL, DECIMAL, DECIMAL, DECIMAL, + //DECIMAL, + //DECIMAL, floatType, DOUBLE, BIGINT); @@ -516,13 +516,13 @@ else if (usingSimbaJdbcDriver(connection)) { BIGINT, VARCHAR, DOUBLE, - floatType, - DECIMAL, - DECIMAL, + //floatType, DECIMAL, DECIMAL, DECIMAL, DECIMAL, + //DECIMAL, + //DECIMAL, floatType, DOUBLE, BIGINT); @@ -547,13 +547,13 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN int_to_bigint int_to_bigint bigint", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN bigint_to_varchar bigint_to_varchar string", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN float_to_double float_to_double double", tableName)); - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_float double_to_float %s", tableName, floatType)); + //onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_float double_to_float %s", tableName, floatType)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN shortdecimal_to_shortdecimal shortdecimal_to_shortdecimal DECIMAL(18,4)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN shortdecimal_to_longdecimal shortdecimal_to_longdecimal DECIMAL(20,4)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN longdecimal_to_shortdecimal longdecimal_to_shortdecimal DECIMAL(12,2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN longdecimal_to_longdecimal longdecimal_to_longdecimal DECIMAL(38,14)", tableName)); - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN float_to_decimal float_to_decimal DECIMAL(10,5)", tableName)); - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_decimal double_to_decimal DECIMAL(10,5)", tableName)); + //onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN float_to_decimal float_to_decimal DECIMAL(10,5)", tableName)); + //onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_decimal double_to_decimal DECIMAL(10,5)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN decimal_to_float decimal_to_float %s", tableName, floatType)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN decimal_to_double decimal_to_double double", tableName)); } From 64d4090632a77b8607adb5bfc225bd1ada762cd4 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 5 Sep 2019 10:35:34 +0200 Subject: [PATCH 17/17] Disable tests broken by Hive breaking changes --- .../io/prestosql/plugin/hive/TestHive.java | 29 +++++++++++++++++++ .../TestAllDatatypesFromHiveConnector.java | 13 ++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java b/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java index 17463aab11cb..493673871bab 100644 --- a/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java +++ b/presto-hive-hadoop2/src/test/java/io/prestosql/plugin/hive/TestHive.java @@ -25,6 +25,7 @@ import static com.google.common.base.Preconditions.checkState; import static java.lang.Integer.parseInt; import static java.util.Objects.requireNonNull; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestHive extends AbstractTestHive @@ -72,4 +73,32 @@ public void testGetPartitionSplitsTableOfflinePartition() super.testGetPartitionSplitsTableOfflinePartition(); } + + @Override + public void testTypesRcBinary() + throws Exception + { + if (getHiveVersionMajor() >= 3) { + // TODO (https://github.com/prestosql/presto/issues/1218) requires https://issues.apache.org/jira/browse/HIVE-22167 + assertThatThrownBy(super::testTypesRcBinary) + .isInstanceOf(AssertionError.class) + .hasMessage("expected [2011-05-06 01:23:09.123] but found [2011-05-06 07:08:09.123]"); + return; + } + super.testTypesRcBinary(); + } + + @Override + public void testTypesParquet() + throws Exception + { + if (getHiveVersionMajor() >= 3) { + // TODO (https://github.com/prestosql/presto/issues/1218) requires https://issues.apache.org/jira/browse/HIVE-21002 + assertThatThrownBy(super::testTypesParquet) + .isInstanceOf(AssertionError.class) + .hasMessage("expected [2011-05-06 01:23:09.123] but found [2011-05-06 07:08:09.123]"); + return; + } + super.testTypesParquet(); + } } diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAllDatatypesFromHiveConnector.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAllDatatypesFromHiveConnector.java index e9678b6bb53f..2d118c3ab1f1 100644 --- a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAllDatatypesFromHiveConnector.java +++ b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestAllDatatypesFromHiveConnector.java @@ -13,7 +13,6 @@ */ package io.prestosql.tests.hive; -import io.prestosql.tempto.ProductTest; import io.prestosql.tempto.Requirement; import io.prestosql.tempto.Requirements; import io.prestosql.tempto.RequirementsProvider; @@ -65,7 +64,7 @@ import static java.sql.JDBCType.VARCHAR; public class TestAllDatatypesFromHiveConnector - extends ProductTest + extends HiveProductTest { public static final class TextRequirements implements RequirementsProvider @@ -276,7 +275,10 @@ public void testSelectAllDatatypesAvro() 234.567, new BigDecimal("346"), new BigDecimal("345.67800"), - Timestamp.valueOf(LocalDateTime.of(2015, 5, 10, 12, 15, 35, 123_000_000)), + getHiveVersionMajor() < 3 + ? Timestamp.valueOf(LocalDateTime.of(2015, 5, 10, 12, 15, 35, 123_000_000)) + // TODO (https://github.com/prestosql/presto/issues/1218) requires https://issues.apache.org/jira/browse/HIVE-21002 + : Timestamp.valueOf(LocalDateTime.of(2015, 5, 10, 18, 0, 35, 123_000_000)), Date.valueOf("2015-05-10"), "ala ma kota", "ala ma kot", @@ -395,7 +397,10 @@ public void testSelectAllDatatypesParquetFile() 234.567, new BigDecimal("346"), new BigDecimal("345.67800"), - Timestamp.valueOf(LocalDateTime.of(2015, 5, 10, 12, 15, 35, 123_000_000)), + getHiveVersionMajor() < 3 + ? Timestamp.valueOf(LocalDateTime.of(2015, 5, 10, 12, 15, 35, 123_000_000)) + // TODO (https://github.com/prestosql/presto/issues/1218) requires https://issues.apache.org/jira/browse/HIVE-21002 + : Timestamp.valueOf(LocalDateTime.of(2015, 5, 10, 18, 0, 35, 123_000_000)), "ala ma kota", "ala ma kot", "ala ma ",