Skip to content

Commit

Permalink
HIVE-28224: Upgrade Orc version in Hive to 1.9.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitriy Fingerman committed May 1, 2024
1 parent c7bf32a commit d7ae5d1
Show file tree
Hide file tree
Showing 21 changed files with 73 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask compaction id as they will be allocated in parallel threads
--! qt:replace:/^[0-9]/#Masked#/
-- Mask removed file size
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/

set hive.llap.io.enabled=true;
set hive.vectorized.execution.enabled=true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ Table Parameters:
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":2,\"fields\":[{\"id\":1,\"name\":\"fname\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"address\",\"required\":false,\"type\":\"string\"}]}
current-snapshot-id #Masked#
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"3167\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-timestamp-ms #Masked#
default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]}
format-version 2
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@
<mysql.version>8.0.31</mysql.version>
<postgres.version>42.7.3</postgres.version>
<oracle.version>21.3.0.0</oracle.version>
<opencsv.version>2.3</opencsv.version>
<orc.version>1.8.5</orc.version>
<opencsv.version>5.9</opencsv.version>
<orc.version>1.9.3</orc.version>
<mockito-core.version>3.4.4</mockito-core.version>
<mockito-inline.version>4.11.0</mockito-inline.version>
<mina.version>2.0.0-M5</mina.version>
Expand Down
16 changes: 11 additions & 5 deletions ql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -536,11 +536,6 @@
<groupId>stax</groupId>
<artifactId>stax-api</artifactId>
</dependency>
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>${opencsv.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-standalone-metastore-server</artifactId>
Expand Down Expand Up @@ -759,6 +754,17 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>${opencsv.version}</version>
<exclusions>
<exclusion>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-dag</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
--! qt:dataset:src
--! qt:dataset:alltypesorc
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
-- Try to run incremental on a non-transactional MV in presence of delete operations
-- Compiler should fall back to full rebuild.

--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
-- Test Incremental rebuild of materialized view without aggregate when source tables have
-- 1) insert operations only
-- 2) update/delete operations since last rebuild.
Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_llap_nonvector.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:dataset:alltypesorc
--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/

set hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_merge12.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:replace:/(File Version:)(.+)/$1#Masked#/
--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
set hive.vectorized.execution.enabled=false;

CREATE TABLE `alltypesorc3xcols`(
Expand Down
2 changes: 2 additions & 0 deletions ql/src/test/queries/clientpositive/stats_part.q
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
-- Mask the totalSize value as it can have slight variability, causing test flakiness
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
set hive.stats.dbclass=fs;
set hive.stats.fetch.column.stats=true;
set datanucleus.cache.collections=false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ Table Parameters:
numFiles 0
numRows 0
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -173,7 +173,7 @@ Table Parameters:
numFiles 1
numRows 12288
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -344,7 +344,7 @@ Table Parameters:
numFiles 1
numRows 2
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -440,7 +440,7 @@ Table Parameters:
numFiles 2
numRows 4
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -532,7 +532,7 @@ Table Parameters:
numFiles 3
numRows 12292
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -603,7 +603,7 @@ Table Parameters:
numFiles 0
numRows 0
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -648,7 +648,7 @@ Table Parameters:
numFiles 1
numRows 500
rawDataSize 5312
totalSize #Masked#
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -735,7 +735,7 @@ Partition Parameters:
numFiles 1
numRows 0
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -860,7 +860,7 @@ Partition Parameters:
numFiles 2
numRows 0
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -910,7 +910,7 @@ Partition Parameters:
numFiles 2
numRows 502
rawDataSize 5318
totalSize #Masked#
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: (b = 1) (type: boolean)
Statistics: Num rows: 69 Data size: 13710 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (b = 1) (type: boolean)
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), 1 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ Table Parameters:
numFiles 2
numRows 10
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down
24 changes: 12 additions & 12 deletions ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 100
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -249,17 +249,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1025
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cstring1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -1354,17 +1354,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector_2
Statistics: Num rows: 12288 Data size: 4468250 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down
10 changes: 5 additions & 5 deletions ql/src/test/results/clientpositive/llap/stats_part.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ Table Parameters:
numPartitions 0
numRows 0
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -231,7 +231,7 @@ Table Parameters:
numPartitions 3
numRows 6
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -292,7 +292,7 @@ Table Parameters:
numPartitions 3
numRows 8
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -449,7 +449,7 @@ Table Parameters:
numPartitions 3
numRows 8
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -534,7 +534,7 @@ Table Parameters:
numPartitions 3
numRows 8
rawDataSize 0
totalSize #Masked#
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ POSTHOOK: Input: default@decimal_vgby
626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.091453237337 5744.897264034264 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794881 6880.679250101608
6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653
762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.74432689170000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.078394999846250000 3491310.1327026924 4937458.140118757
NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135323 5696.410307714464 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565151 6822.606289190915
NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135325 5696.4103077144655 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.4957485651385 6822.606289190904
PREHOOK: query: CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS
SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(11,5)) AS cdecimal1,
CAST (((cdouble*9.3)/13) AS DECIMAL(16,0)) AS cdecimal2,
Expand Down Expand Up @@ -828,7 +828,7 @@ POSTHOOK: Input: default@decimal_vgby_small
626923679 1024 9723.40270 -9778.95135 10541.05247 10.293996553 5742.091453325365 5744.897264122335 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185
6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175
762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881
NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.4830839098695 5696.410309489299 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734
NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909676 5696.410309489105 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734
PREHOOK: query: SELECT SUM(HASH(*))
FROM (SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
Expand All @@ -847,4 +847,4 @@ FROM (SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby_small
#### A masked pattern was here ####
95165244160
95767761728
Loading

0 comments on commit d7ae5d1

Please sign in to comment.