Skip to content

Commit

Permalink
HIVE-28224: Upgrade Orc version in Hive to 1.9.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitriy Fingerman committed Jul 29, 2024
1 parent f5ca9e9 commit a001586
Show file tree
Hide file tree
Showing 12 changed files with 29 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask compaction id as they will be allocated in parallel threads
--! qt:replace:/^[0-9]/#Masked#/
-- Mask removed file size
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/

set hive.llap.io.enabled=true;
set hive.vectorized.execution.enabled=true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ Table Parameters:
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":2,\"fields\":[{\"id\":1,\"name\":\"fname\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"address\",\"required\":false,\"type\":\"string\"}]}
current-snapshot-id #Masked#
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"3167\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-timestamp-ms #Masked#
default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]}
format-version 2
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
<postgres.version>42.7.3</postgres.version>
<oracle.version>21.3.0.0</oracle.version>
<opencsv.version>5.9</opencsv.version>
<orc.version>1.8.5</orc.version>
<orc.version>1.9.4</orc.version>
<mockito-core.version>3.4.4</mockito-core.version>
<mockito-inline.version>4.11.0</mockito-inline.version>
<mina.version>2.0.0-M5</mina.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,7 @@ public void nextVector(ColumnVector previousVector, boolean[] isNull, final int
FilterContext filterContext, ReadPhase readPhase) throws IOException {
if (vectors == null) {
super.nextVector(previousVector, isNull, batchSize, filterContext, readPhase);
previousVector.isRepeating = false;
return;
}
vectors.get(vectorIndex++).shallowCopyTo(previousVector);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
-- Try to run incremental on a non-transactional MV in presence of delete operations
-- Compiler should fall back to full rebuild.

--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_llap_nonvector.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:dataset:alltypesorc
--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/

set hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_merge12.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:replace:/(File Version:)(.+)/$1#Masked#/
--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
set hive.vectorized.execution.enabled=false;

CREATE TABLE `alltypesorc3xcols`(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: (b = 1) (type: boolean)
Statistics: Num rows: 69 Data size: 13710 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (b = 1) (type: boolean)
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), 1 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Expand Down
24 changes: 12 additions & 12 deletions ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 100
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -249,17 +249,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1025
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cstring1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -1354,17 +1354,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector_2
Statistics: Num rows: 12288 Data size: 4468250 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ Found 4 items
-rw-rw-rw- 3 ### USER ### ### GROUP ### 8753 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7531 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7174 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7066 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7065 ### HDFS DATE ### hdfs://### HDFS PATH ###
PREHOOK: query: insert into over10k_orc_bucketed_n0 select * from over10k_n9
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n9
Expand All @@ -402,8 +402,8 @@ Found 8 items
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7531 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7174 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7174 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7066 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7066 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7065 ### HDFS DATE ### hdfs://### HDFS PATH ###
-rw-rw-rw- 3 ### USER ### ### GROUP ### 7065 ### HDFS DATE ### hdfs://### HDFS PATH ###
PREHOOK: query: select distinct 7 as seven, INPUT__FILE__NAME from over10k_orc_bucketed_n0
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_orc_bucketed_n0
Expand Down
2 changes: 1 addition & 1 deletion ql/src/test/results/clientpositive/tez/orc_merge12.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ Stripes:
Entry 0: count: 6889 hasNull: true true: 3402 positions: 0,0,0,0,0,0,0,0
Entry 1: count: 2284 hasNull: true true: 581 positions: 0,168,8,0,0,520,97,1

File length: 3004637 bytes
File length: #Masked# bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion standalone-metastore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
<libthrift.version>0.16.0</libthrift.version>
<log4j2.version>2.18.0</log4j2.version>
<mockito-core.version>3.4.4</mockito-core.version>
<orc.version>1.8.5</orc.version>
<orc.version>1.9.4</orc.version>
<protobuf.version>3.24.4</protobuf.version>
<io.grpc.version>1.51.0</io.grpc.version>
<sqlline.version>1.9.0</sqlline.version>
Expand Down

0 comments on commit a001586

Please sign in to comment.