From a00158675c5086ce62b1d0cc83b4ecca675a8bc5 Mon Sep 17 00:00:00 2001 From: Dmitriy Fingerman Date: Thu, 18 Apr 2024 13:00:22 -0400 Subject: [PATCH] HIVE-28224: Upgrade Orc version in Hive to 1.9.4 --- ...ceberg_major_compaction_schema_evolution.q | 2 ++ ...rg_major_compaction_schema_evolution.q.out | 2 +- pom.xml | 2 +- .../orc/encoded/EncodedTreeReaderFactory.java | 1 + .../materialized_view_create_rewrite_10.q | 2 +- .../clientpositive/orc_llap_nonvector.q | 1 + .../test/queries/clientpositive/orc_merge12.q | 1 + .../materialized_view_create_rewrite_10.q.out | 8 +++---- .../llap/orc_llap_nonvector.q.out | 24 +++++++++---------- .../tez/acid_vectorization_original_tez.q.out | 6 ++--- .../clientpositive/tez/orc_merge12.q.out | 2 +- standalone-metastore/pom.xml | 2 +- 12 files changed, 29 insertions(+), 24 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_schema_evolution.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_schema_evolution.q index 73dbe19a94b5f..8501e694de025 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_schema_evolution.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_schema_evolution.q @@ -14,6 +14,8 @@ --! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ -- Mask compaction id as they will be allocated in parallel threads --! qt:replace:/^[0-9]/#Masked#/ +-- Mask removed file size +--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ set hive.llap.io.enabled=true; set hive.vectorized.execution.enabled=true; diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out index cfe8f3d3d46f2..1ca9d2b6eef13 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out @@ -227,7 +227,7 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":2,\"fields\":[{\"id\":1,\"name\":\"fname\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"address\",\"required\":false,\"type\":\"string\"}]} current-snapshot-id #Masked# - current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"3167\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"} + current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"} current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 diff --git a/pom.xml b/pom.xml index d69626e0f2891..778a8e6392038 100644 --- a/pom.xml +++ b/pom.xml @@ -182,7 +182,7 @@ 42.7.3 21.3.0.0 5.9 - 1.8.5 + 1.9.4 3.4.4 4.11.0 2.0.0-M5 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java index 46e5a3c3be897..869528fc889e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java @@ -1078,6 +1078,7 @@ public void nextVector(ColumnVector previousVector, boolean[] isNull, final int FilterContext filterContext, ReadPhase readPhase) throws IOException { if (vectors == null) { super.nextVector(previousVector, isNull, batchSize, filterContext, readPhase); + previousVector.isRepeating = false; return; } vectors.get(vectorIndex++).shallowCopyTo(previousVector); diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q index 417872c272722..ff99561a8cd15 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q +++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_10.q @@ -1,6 +1,6 @@ -- Try to run incremental on a non-transactional MV in presence of delete operations -- Compiler should fall back to full rebuild. - +--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; diff --git a/ql/src/test/queries/clientpositive/orc_llap_nonvector.q b/ql/src/test/queries/clientpositive/orc_llap_nonvector.q index 95a0384ccaf7d..31a3d63c58acf 100644 --- a/ql/src/test/queries/clientpositive/orc_llap_nonvector.q +++ b/ql/src/test/queries/clientpositive/orc_llap_nonvector.q @@ -1,4 +1,5 @@ --! qt:dataset:alltypesorc +--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; diff --git a/ql/src/test/queries/clientpositive/orc_merge12.q b/ql/src/test/queries/clientpositive/orc_merge12.q index 348c3a8f5e086..d1487d2209853 100644 --- a/ql/src/test/queries/clientpositive/orc_merge12.q +++ b/ql/src/test/queries/clientpositive/orc_merge12.q @@ -1,4 +1,5 @@ --! qt:replace:/(File Version:)(.+)/$1#Masked#/ +--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/ set hive.vectorized.execution.enabled=false; CREATE TABLE `alltypesorc3xcols`( diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out index 86cce82f092cf..5ff07b9fc54d9 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out @@ -79,17 +79,17 @@ STAGE PLANS: TableScan alias: t1 filterExpr: (b = 1) (type: boolean) - Statistics: Num rows: 69 Data size: 13710 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (b = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), 1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out index 669df679d82c1..8ef0f0dde97ae 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out @@ -94,17 +94,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector - Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,17 +249,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector - Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1025 - Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1354,17 +1354,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector_2 - Statistics: Num rows: 12288 Data size: 4468250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out index 3c6ddb257a4a3..894566a3bf18d 100644 --- a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out +++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out @@ -375,7 +375,7 @@ Found 4 items -rw-rw-rw- 3 ### USER ### ### GROUP ### 8753 ### HDFS DATE ### hdfs://### HDFS PATH ### -rw-rw-rw- 3 ### USER ### ### GROUP ### 7531 ### HDFS DATE ### hdfs://### HDFS PATH ### -rw-rw-rw- 3 ### USER ### ### GROUP ### 7174 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### 7066 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 7065 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: insert into over10k_orc_bucketed_n0 select * from over10k_n9 PREHOOK: type: QUERY PREHOOK: Input: default@over10k_n9 @@ -402,8 +402,8 @@ Found 8 items -rw-rw-rw- 3 ### USER ### ### GROUP ### 7531 ### HDFS DATE ### hdfs://### HDFS PATH ### -rw-rw-rw- 3 ### USER ### ### GROUP ### 7174 ### HDFS DATE ### hdfs://### HDFS PATH ### -rw-rw-rw- 3 ### USER ### ### GROUP ### 7174 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### 7066 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### 7066 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 7065 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 7065 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select distinct 7 as seven, INPUT__FILE__NAME from over10k_orc_bucketed_n0 PREHOOK: type: QUERY PREHOOK: Input: default@over10k_orc_bucketed_n0 diff --git a/ql/src/test/results/clientpositive/tez/orc_merge12.q.out b/ql/src/test/results/clientpositive/tez/orc_merge12.q.out index 73526e4e93e23..3bda372791063 100644 --- a/ql/src/test/results/clientpositive/tez/orc_merge12.q.out +++ b/ql/src/test/results/clientpositive/tez/orc_merge12.q.out @@ -814,7 +814,7 @@ Stripes: Entry 0: count: 6889 hasNull: true true: 3402 positions: 0,0,0,0,0,0,0,0 Entry 1: count: 2284 hasNull: true true: 581 positions: 0,168,8,0,0,520,97,1 -File length: 3004637 bytes +File length: #Masked# bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml index 3964dfba17b33..2ada7d9869b2f 100644 --- a/standalone-metastore/pom.xml +++ b/standalone-metastore/pom.xml @@ -91,7 +91,7 @@ 0.16.0 2.18.0 3.4.4 - 1.8.5 + 1.9.4 3.24.4 1.51.0 1.9.0