Skip to content

Commit

Permalink
HIVE-28224: Upgrade Orc version in Hive to 1.9.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitriy Fingerman committed Apr 29, 2024
1 parent e980c7a commit 7b803ca
Show file tree
Hide file tree
Showing 23 changed files with 76 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask compaction id as they will be allocated in parallel threads
--! qt:replace:/^[0-9]/#Masked#/
-- Mask removed file size
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/

set hive.llap.io.enabled=true;
set hive.vectorized.execution.enabled=true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ Table Parameters:
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":2,\"fields\":[{\"id\":1,\"name\":\"fname\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"address\",\"required\":false,\"type\":\"string\"}]}
current-snapshot-id #Masked#
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"3167\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-summary {\"deleted-data-files\":\"6\",\"deleted-records\":\"6\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"2\",\"total-records\":\"10\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"10\",\"total-delete-files\":\"8\",\"total-position-deletes\":\"8\",\"total-equality-deletes\":\"0\"}
current-snapshot-timestamp-ms #Masked#
default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]}
format-version 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ public void testStatsAfterQueryCompactionOnTez() throws Exception {
parameters = Hive.get().getTable(tblName).getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "736", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "735", parameters.get("totalSize"));
}

@Test
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@
<mysql.version>8.0.31</mysql.version>
<postgres.version>42.7.3</postgres.version>
<oracle.version>21.3.0.0</oracle.version>
<opencsv.version>2.3</opencsv.version>
<orc.version>1.8.5</orc.version>
<opencsv.version>5.9</opencsv.version>
<orc.version>1.9.3</orc.version>
<mockito-core.version>3.4.4</mockito-core.version>
<mockito-inline.version>4.11.0</mockito-inline.version>
<mina.version>2.0.0-M5</mina.version>
Expand Down
16 changes: 11 additions & 5 deletions ql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -536,11 +536,6 @@
<groupId>stax</groupId>
<artifactId>stax-api</artifactId>
</dependency>
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>${opencsv.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-standalone-metastore-server</artifactId>
Expand Down Expand Up @@ -759,6 +754,17 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>${opencsv.version}</version>
<exclusions>
<exclusion>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-dag</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ public void testCompactStatsGather() throws Exception {
.getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "692", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "693", parameters.get("totalSize"));

int[][] targetVals2 = {{5, 1, 1}, {5, 2, 2}, {5, 3, 1}, {5, 4, 2}};
runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals2));
Expand Down Expand Up @@ -844,7 +844,7 @@ public void testCompactStatsGather() throws Exception {
.getParameters();
Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
Assert.assertEquals("The number of rows is differing from the expected", "4", parameters.get("numRows"));
Assert.assertEquals("The total table size is differing from the expected", "705", parameters.get("totalSize"));
Assert.assertEquals("The total table size is differing from the expected", "704", parameters.get("totalSize"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
--! qt:dataset:src
--! qt:dataset:alltypesorc
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
-- Try to run incremental on a non-transactional MV in presence of delete operations
-- Compiler should fall back to full rebuild.

--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
-- Test Incremental rebuild of materialized view without aggregate when source tables have
-- 1) insert operations only
-- 2) update/delete operations since last rebuild.
Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_llap_nonvector.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:dataset:alltypesorc
--! qt:replace:/(\S Data size\:\s+)\S+(\s+Basic stats\: \S+ Column stats\: \S+)/$1#Masked#$2/

set hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
Expand Down
1 change: 1 addition & 0 deletions ql/src/test/queries/clientpositive/orc_merge12.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--! qt:replace:/(File Version:)(.+)/$1#Masked#/
--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
set hive.vectorized.execution.enabled=false;

CREATE TABLE `alltypesorc3xcols`(
Expand Down
2 changes: 2 additions & 0 deletions ql/src/test/queries/clientpositive/stats_part.q
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
-- Mask the totalSize value as it can have slight variability, causing test flakiness
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
set hive.stats.dbclass=fs;
set hive.stats.fetch.column.stats=true;
set datanucleus.cache.collections=false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ Table Parameters:
numFiles 0
numRows 0
rawDataSize 0
totalSize 0
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -173,7 +173,7 @@ Table Parameters:
numFiles 1
numRows 12288
rawDataSize 0
totalSize 309572
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -344,7 +344,7 @@ Table Parameters:
numFiles 1
numRows 2
rawDataSize 0
totalSize 1673
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -440,7 +440,7 @@ Table Parameters:
numFiles 2
numRows 4
rawDataSize 0
totalSize 3346
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -532,7 +532,7 @@ Table Parameters:
numFiles 3
numRows 12292
rawDataSize 0
totalSize 312919
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -603,7 +603,7 @@ Table Parameters:
numFiles 0
numRows 0
rawDataSize 0
totalSize 0
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -648,7 +648,7 @@ Table Parameters:
numFiles 1
numRows 500
rawDataSize 5312
totalSize 5812
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -735,7 +735,7 @@ Partition Parameters:
numFiles 1
numRows 0
rawDataSize 0
totalSize 5812
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -860,7 +860,7 @@ Partition Parameters:
numFiles 2
numRows 0
rawDataSize 0
totalSize 5820
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down Expand Up @@ -910,7 +910,7 @@ Partition Parameters:
numFiles 2
numRows 502
rawDataSize 5318
totalSize 5820
totalSize #Masked#
#### A masked pattern was here ####

# Storage Information
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: (b = 1) (type: boolean)
Statistics: Num rows: 69 Data size: 13710 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (b = 1) (type: boolean)
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: int), 1 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ Table Parameters:
numFiles 2
numRows 10
rawDataSize 0
totalSize 1522
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down
24 changes: 12 additions & 12 deletions ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 100
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 100 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -249,17 +249,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector
Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1025
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cstring1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1025 Data size: 75068 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 1025 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down Expand Up @@ -1354,17 +1354,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: orc_llap_nonvector_2
Statistics: Num rows: 12288 Data size: 4468250 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 12288 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE
Statistics: Num rows: 10 Data size: #Masked# Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Expand Down
10 changes: 5 additions & 5 deletions ql/src/test/results/clientpositive/llap/stats_part.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ Table Parameters:
numPartitions 0
numRows 0
rawDataSize 0
totalSize 0
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -231,7 +231,7 @@ Table Parameters:
numPartitions 3
numRows 6
rawDataSize 0
totalSize 2292
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -292,7 +292,7 @@ Table Parameters:
numPartitions 3
numRows 8
rawDataSize 0
totalSize 3066
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -449,7 +449,7 @@ Table Parameters:
numPartitions 3
numRows 8
rawDataSize 0
totalSize 3066
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down Expand Up @@ -534,7 +534,7 @@ Table Parameters:
numPartitions 3
numRows 8
rawDataSize 0
totalSize 3066
totalSize #Masked#
transactional true
transactional_properties default
#### A masked pattern was here ####
Expand Down
Loading

0 comments on commit 7b803ca

Please sign in to comment.