Skip to content

Commit

Permalink
HIVE-28224: Upgrade Orc version in Hive to 1.9.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitriy Fingerman committed Jul 31, 2024
1 parent 09553fc commit d4f1b57
Show file tree
Hide file tree
Showing 14 changed files with 50 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask compaction id as they will be allocated in parallel threads
--! qt:replace:/^[0-9]/#Masked#/
-- Mask removed file size
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/

set hive.llap.io.enabled=true;
set hive.vectorized.execution.enabled=true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ Table Parameters:
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":2,\"fields\":[{\"id\":1,\"name\":\"fname\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"address\",\"required\":false,\"type\":\"string\"}]}
current-snapshot-id #Masked#
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"3167\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-timestamp-ms #Masked#
default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]}
format-version 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,22 +178,22 @@ private void testRebalanceCompactionWithParallelDeleteAsSecond(boolean optimisti
"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":4}\t13\t13",
},
{
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4",
},
{
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t6\t4",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t5\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3",
},
{
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t6\t3",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t5\t2",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t6\t2",
},
};
verifyRebalance(testDataProvider, tableName, null, expectedBuckets,
Expand Down Expand Up @@ -234,22 +234,22 @@ public void testRebalanceCompactionOfNotPartitionedImplicitlyBucketedTableWithOr
},
{
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":5}\t12\t12",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t2\t4",
"{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t5\t4",
},
{
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t6\t4",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t5\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t2\t3",
"{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3",
},
{
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t6\t3",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t5\t2",
"{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t6\t2",
},
};
verifyRebalance(testDataProvider, tableName, null, expectedBuckets,
Expand Down Expand Up @@ -525,6 +525,8 @@ private TestDataProvider prepareRebalanceTestData(String tableName) throws Excep
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2",
"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t5\t3",
"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12",
"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13",
"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14",
Expand All @@ -533,9 +535,7 @@ private TestDataProvider prepareRebalanceTestData(String tableName) throws Excep
"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17",
},
{
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t5\t2",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t5\t3",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t2\t4",
"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t4",
},
{
"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t3",
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
<postgres.version>42.7.3</postgres.version>
<oracle.version>21.3.0.0</oracle.version>
<opencsv.version>5.9</opencsv.version>
<orc.version>1.8.5</orc.version>
<orc.version>1.9.4</orc.version>
<mockito-core.version>3.4.4</mockito-core.version>
<mockito-inline.version>4.11.0</mockito-inline.version>
<mina.version>2.0.0-M5</mina.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,7 @@ public void nextVector(ColumnVector previousVector, boolean[] isNull, final int
FilterContext filterContext, ReadPhase readPhase) throws IOException {
if (vectors == null) {
super.nextVector(previousVector, isNull, batchSize, filterContext, readPhase);
previousVector.isRepeating = false;
return;
}
vectors.get(vectorIndex++).shallowCopyTo(previousVector);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:replace:/(File Version:)(.+)/$1#Masked#/
--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
-- SORT_QUERY_RESULTS
SET hive.vectorized.execution.enabled=FALSE;
SET hive.mapred.mode=nonstrict;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
-- Try to run incremental on a non-transactional MV in presence of delete operations
-- Compiler should fall back to full rebuild.

--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_llap_nonvector.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:dataset:alltypesorc
--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/

set hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_merge12.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:replace:/(File Version:)(.+)/$1#Masked#/
--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
set hive.vectorized.execution.enabled=false;

CREATE TABLE `alltypesorc3xcols`(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ Stripes:
Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25
Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25

File length: 1217 bytes
File length: #Masked# bytes
Padding length: 0 bytes
Padding ratio: 0%

Expand Down Expand Up @@ -299,7 +299,7 @@ Stripes:
Entry 0: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25
Stripe level merge: numHashFunctions: 6 bitCount: 81472 popCount: 6 loadFactor: 0.0001 expectedFpp: 1.5953551E-25

File length: 1211 bytes
File length: #Masked# bytes
Padding length: 0 bytes
Padding ratio: 0%

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: (b = 1) (type: boolean)
Statistics: Num rows: 69 Data size: 13710 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (b = 1) (type: boolean)
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), 1 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Expand Down
24 changes: 12 additions & 12 deletions ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 100
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -249,17 +249,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1025
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cstring1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -1354,17 +1354,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector_2
Statistics: Num rows: 12288 Data size: 4468250 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down
2 changes: 1 addition & 1 deletion ql/src/test/results/clientpositive/tez/orc_merge12.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ Stripes:
Entry 0: count: 6889 hasNull: true true: 3402 positions: 0,0,0,0,0,0,0,0
Entry 1: count: 2284 hasNull: true true: 581 positions: 0,168,8,0,0,520,97,1

File length: 3004637 bytes
File length: #Masked# bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion standalone-metastore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
<libthrift.version>0.16.0</libthrift.version>
<log4j2.version>2.18.0</log4j2.version>
<mockito-core.version>3.4.4</mockito-core.version>
<orc.version>1.8.5</orc.version>
<orc.version>1.9.4</orc.version>
<protobuf.version>3.24.4</protobuf.version>
<io.grpc.version>1.51.0</io.grpc.version>
<sqlline.version>1.9.0</sqlline.version>
Expand Down

0 comments on commit d4f1b57

Please sign in to comment.