From 5314aaeeda1c02bb67e2eea6fe22b3dbb4a40186 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 25 Nov 2024 13:05:19 +0100 Subject: [PATCH 01/30] HIVE-28029: Make unit tests based on TxnCommandsBaseForTests/DbTxnManagerEndToEndTestBase run on Tez --- .../hive/ql/TxnCommandsBaseForTests.java | 29 +++++++++++++++++-- .../lockmgr/DbTxnManagerEndToEndTestBase.java | 19 ++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index a0ae8f860371..c3cad0888dbb 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.lockmgr.DbTxnManagerEndToEndTestBase; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientWithLocalCache; import org.apache.hadoop.hive.ql.processors.CommandProcessorException; import org.apache.hadoop.hive.ql.session.SessionState; @@ -62,7 +63,12 @@ public abstract class TxnCommandsBaseForTests { private static final Logger LOG = LoggerFactory.getLogger(TxnCommandsBaseForTests.class); - + + private static final String TEST_DATA_DIR = new File( + System.getProperty("java.io.tmpdir") + File.separator + + DbTxnManagerEndToEndTestBase.class.getCanonicalName() + "-" + System.currentTimeMillis()) + .getPath().replaceAll("\\\\", "/"); + //bucket count for test tables; set it to 1 for easier debugging final static int BUCKET_COUNT = 2; @Rule @@ -107,9 +113,26 @@ public void setUp() throws Exception { } void initHiveConf() { hiveConf = new HiveConf(this.getClass()); - //TODO: HIVE-28029: Make unit tests based on TxnCommandsBaseForTests run on Tez - hiveConf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr"); + setupTez(hiveConf); } + + private void setupTez(HiveConf conf) { + conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); + conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); + conf.set("tez.am.resource.memory.mb", "128"); + conf.set("tez.am.dag.scheduler.class", + "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); + conf.setBoolean("tez.local.mode", true); + conf.setBoolean("tez.local.mode.without.network", true); + conf.set("fs.defaultFS", "file:///"); + conf.setBoolean("tez.runtime.optimize.local.fetch", true); + conf.set("tez.staging-dir", TEST_DATA_DIR); + conf.setBoolean("tez.ignore.lib.uris", true); + conf.set("hive.tez.container.size", "128"); + conf.setBoolean("hive.merge.tezfiles", false); + conf.setBoolean("hive.in.tez.test", true); + } + void setUpInternal() throws Exception { initHiveConf(); Path workDir = new Path(System.getProperty("test.tmp.dir", diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java index 8f62c598932c..51bf7742dafc 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java @@ -74,6 +74,8 @@ public static void setUpDB() throws Exception{ @Before public void setUp() throws Exception { + setupTez(conf); + // set up metastore client cache if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.MSC_CACHE_ENABLED)) { HiveMetaStoreClientWithLocalCache.init(conf); @@ -104,6 +106,23 @@ public void setUp() throws Exception { throw new RuntimeException("Could not create " + getWarehouseDir()); } } + + private void setupTez(HiveConf conf) { + conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); + conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); + conf.set("tez.am.resource.memory.mb", "128"); + conf.set("tez.am.dag.scheduler.class", + "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); + conf.setBoolean("tez.local.mode", true); + conf.setBoolean("tez.local.mode.without.network", true); + conf.set("fs.defaultFS", "file:///"); + conf.setBoolean("tez.runtime.optimize.local.fetch", true); + conf.set("tez.staging-dir", TEST_DATA_DIR); + conf.setBoolean("tez.ignore.lib.uris", true); + conf.set("hive.tez.container.size", "128"); + conf.setBoolean("hive.merge.tezfiles", false); + conf.setBoolean("hive.in.tez.test", true); + } @After public void tearDown() throws Exception { From 71994b5079218b16f23738db1b7464bc4d84e00a Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 25 Nov 2024 14:31:16 +0100 Subject: [PATCH 02/30] clear session id stored in base hiveconf when executing statement in a new thread --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 2344b908cebc..95f46d33a737 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -304,6 +304,7 @@ private static final class QueryRunnable implements Runnable { this.cdlIn = cdlIn; this.cdlOut = cdlOut; this.hiveConf = new HiveConf(hiveConf); + this.hiveConf.unset(HiveConf.ConfVars.HIVE_SESSION_ID.varname); } @Override From f9b44fcf647799eeead316651e0d59690e4aa0b6 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 25 Nov 2024 14:42:27 +0100 Subject: [PATCH 03/30] extract common tez setup and TEST_DATA_DIR --- .../hadoop/hive/ql/TezBaseForTests.java | 46 +++++++++++++++++++ .../hive/ql/TxnCommandsBaseForTests.java | 24 +--------- .../lockmgr/DbTxnManagerEndToEndTestBase.java | 27 +---------- 3 files changed, 49 insertions(+), 48 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java new file mode 100644 index 000000000000..40e4c7552c65 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.hive.conf.HiveConf; + +import java.io.File; + +public abstract class TezBaseForTests { + protected static final String TEST_DATA_DIR = new File( + System.getProperty("java.io.tmpdir") + File.separator + + TezBaseForTests.class.getCanonicalName() + "-" + System.currentTimeMillis()) + .getPath().replaceAll("\\\\", "/"); + + protected void setupTez(HiveConf conf) { + conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); + conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); + conf.set("tez.am.resource.memory.mb", "128"); + conf.set("tez.am.dag.scheduler.class", + "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); + conf.setBoolean("tez.local.mode", true); + conf.setBoolean("tez.local.mode.without.network", true); + conf.set("fs.defaultFS", "file:///"); + conf.setBoolean("tez.runtime.optimize.local.fetch", true); + conf.set("tez.staging-dir", TEST_DATA_DIR); + conf.setBoolean("tez.ignore.lib.uris", true); + conf.set("hive.tez.container.size", "128"); + conf.setBoolean("hive.merge.tezfiles", false); + conf.setBoolean("hive.in.tez.test", true); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index c3cad0888dbb..a76710975d14 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -61,14 +61,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class TxnCommandsBaseForTests { +public abstract class TxnCommandsBaseForTests extends TezBaseForTests { private static final Logger LOG = LoggerFactory.getLogger(TxnCommandsBaseForTests.class); - private static final String TEST_DATA_DIR = new File( - System.getProperty("java.io.tmpdir") + File.separator + - DbTxnManagerEndToEndTestBase.class.getCanonicalName() + "-" + System.currentTimeMillis()) - .getPath().replaceAll("\\\\", "/"); - //bucket count for test tables; set it to 1 for easier debugging final static int BUCKET_COUNT = 2; @Rule @@ -116,23 +111,6 @@ void initHiveConf() { setupTez(hiveConf); } - private void setupTez(HiveConf conf) { - conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); - conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); - conf.set("tez.am.resource.memory.mb", "128"); - conf.set("tez.am.dag.scheduler.class", - "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); - conf.setBoolean("tez.local.mode", true); - conf.setBoolean("tez.local.mode.without.network", true); - conf.set("fs.defaultFS", "file:///"); - conf.setBoolean("tez.runtime.optimize.local.fetch", true); - conf.set("tez.staging-dir", TEST_DATA_DIR); - conf.setBoolean("tez.ignore.lib.uris", true); - conf.set("hive.tez.container.size", "128"); - conf.setBoolean("hive.merge.tezfiles", false); - conf.setBoolean("hive.in.tez.test", true); - } - void setUpInternal() throws Exception { initHiveConf(); Path workDir = new Path(System.getProperty("test.tmp.dir", diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java index 51bf7742dafc..5a97bfe896ba 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.TezBaseForTests; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientWithLocalCache; import org.apache.hadoop.hive.ql.session.SessionState; import org.junit.After; @@ -40,12 +41,7 @@ /** * Base class for "end-to-end" tests for DbTxnManager and simulate concurrent queries. */ -public abstract class DbTxnManagerEndToEndTestBase { - - private static final String TEST_DATA_DIR = new File( - System.getProperty("java.io.tmpdir") + File.separator + - DbTxnManagerEndToEndTestBase.class.getCanonicalName() + "-" + System.currentTimeMillis()) - .getPath().replaceAll("\\\\", "/"); +public abstract class DbTxnManagerEndToEndTestBase extends TezBaseForTests { protected static HiveConf conf = new HiveConf(Driver.class); protected HiveTxnManager txnMgr; @@ -54,8 +50,6 @@ public abstract class DbTxnManagerEndToEndTestBase { protected TxnStore txnHandler; public DbTxnManagerEndToEndTestBase() { - //TODO: HIVE-28029: Make unit tests based on DbTxnManagerEndToEndTestBase run on Tez - conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr"); HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); @@ -107,23 +101,6 @@ public void setUp() throws Exception { } } - private void setupTez(HiveConf conf) { - conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); - conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); - conf.set("tez.am.resource.memory.mb", "128"); - conf.set("tez.am.dag.scheduler.class", - "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); - conf.setBoolean("tez.local.mode", true); - conf.setBoolean("tez.local.mode.without.network", true); - conf.set("fs.defaultFS", "file:///"); - conf.setBoolean("tez.runtime.optimize.local.fetch", true); - conf.set("tez.staging-dir", TEST_DATA_DIR); - conf.setBoolean("tez.ignore.lib.uris", true); - conf.set("hive.tez.container.size", "128"); - conf.setBoolean("hive.merge.tezfiles", false); - conf.setBoolean("hive.in.tez.test", true); - } - @After public void tearDown() throws Exception { driver.close(); From 395ebb71cc4fb00f08287ccdf35f14198334ecfd Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Tue, 26 Nov 2024 11:48:11 +0100 Subject: [PATCH 04/30] adress review comments: use HiveConfForTest --- .../ql/txn/compactor/CompactorOnTezTest.java | 30 +++++------- .../hadoop/hive/ql/TezBaseForTests.java | 46 ------------------- .../hive/ql/TxnCommandsBaseForTests.java | 6 +-- .../lockmgr/DbTxnManagerEndToEndTestBase.java | 12 ++--- 4 files changed, 19 insertions(+), 75 deletions(-) delete mode 100644 ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java index 5118bd0bbc27..f453aa3bd527 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConfForTest; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -64,12 +65,6 @@ * Superclass for Test[Crud|Mm]CompactorOnTez, for setup and helper classes. */ public abstract class CompactorOnTezTest { - private static final AtomicInteger RANDOM_INT = new AtomicInteger(new Random().nextInt()); - private static final String TEST_DATA_DIR = new File( - System.getProperty("java.io.tmpdir") + File.separator + TestCrudCompactorOnTez.class - .getCanonicalName() + "-" + System.currentTimeMillis() + "_" + RANDOM_INT - .getAndIncrement()).getPath().replaceAll("\\\\", "/"); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; static final String CUSTOM_COMPACTION_QUEUE = "my_compaction_test_queue"; protected HiveConf conf; @@ -85,7 +80,7 @@ public abstract class CompactorOnTezTest { @Before // Note: we create a new conf and driver object before every test public void setup() throws Exception { - HiveConf hiveConf = new HiveConf(this.getClass()); + HiveConfForTest hiveConf = new HiveConfForTest(this.getClass()); setupWithConf(hiveConf); } @@ -94,17 +89,18 @@ public static void setupClass() throws Exception { tmpFolder = folder.newFolder().getAbsolutePath(); } - protected void setupWithConf(HiveConf hiveConf) throws Exception { - File f = new File(TEST_WAREHOUSE_DIR); + protected void setupWithConf(HiveConfForTest hiveConf) throws Exception { + String testWarehouseDir = hiveConf.getTestDataDir() + "/warehouse"; + File f = new File(testWarehouseDir); if (f.exists()) { FileUtil.fullyDelete(f); } - if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { - throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); + if (!(new File(testWarehouseDir).mkdirs())) { + throw new RuntimeException("Could not create " + testWarehouseDir); } hiveConf.setVar(HiveConf.ConfVars.PRE_EXEC_HOOKS, ""); hiveConf.setVar(HiveConf.ConfVars.POST_EXEC_HOOKS, ""); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_WAREHOUSE, TEST_WAREHOUSE_DIR); + hiveConf.setVar(HiveConf.ConfVars.METASTORE_WAREHOUSE, testWarehouseDir); hiveConf.setVar(HiveConf.ConfVars.HIVE_INPUT_FORMAT, HiveInputFormat.class.getName()); hiveConf.setVar(HiveConf.ConfVars.HIVE_FETCH_TASK_CONVERSION, "none"); MetastoreConf.setTimeVar(hiveConf, MetastoreConf.ConfVars.TXN_OPENTXN_TIMEOUT, 2, TimeUnit.SECONDS); @@ -116,27 +112,23 @@ protected void setupWithConf(HiveConf hiveConf) throws Exception { TestTxnDbUtil.prepDb(hiveConf); conf = hiveConf; // Use tez as execution engine for this test class - setupTez(conf); + setupTez(hiveConf); msClient = new HiveMetaStoreClient(conf); driver = DriverFactory.newDriver(conf); SessionState.start(new CliSessionState(conf)); } - private void setupTez(HiveConf conf) { + private void setupTez(HiveConfForTest conf) { conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); - conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); conf.set("tez.am.resource.memory.mb", "128"); conf.set("tez.am.dag.scheduler.class", "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); - conf.setBoolean("tez.local.mode", true); - conf.setBoolean("tez.local.mode.without.network", true); conf.set("fs.defaultFS", "file:///"); conf.setBoolean("tez.runtime.optimize.local.fetch", true); - conf.set("tez.staging-dir", TEST_DATA_DIR); + conf.set("tez.staging-dir", conf.getTestDataDir()); conf.setBoolean("tez.ignore.lib.uris", true); conf.set("hive.tez.container.size", "128"); conf.setBoolean("hive.merge.tezfiles", false); - conf.setBoolean("hive.in.tez.test", true); if (!mmCompaction) { // We need these settings to create a table which is not bucketed, but contains multiple files. // If these parameters are set when inserting 100 rows into the table, the rows will diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java deleted file mode 100644 index 40e4c7552c65..000000000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/TezBaseForTests.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql; - -import org.apache.hadoop.hive.conf.HiveConf; - -import java.io.File; - -public abstract class TezBaseForTests { - protected static final String TEST_DATA_DIR = new File( - System.getProperty("java.io.tmpdir") + File.separator + - TezBaseForTests.class.getCanonicalName() + "-" + System.currentTimeMillis()) - .getPath().replaceAll("\\\\", "/"); - - protected void setupTez(HiveConf conf) { - conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); - conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); - conf.set("tez.am.resource.memory.mb", "128"); - conf.set("tez.am.dag.scheduler.class", - "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); - conf.setBoolean("tez.local.mode", true); - conf.setBoolean("tez.local.mode.without.network", true); - conf.set("fs.defaultFS", "file:///"); - conf.setBoolean("tez.runtime.optimize.local.fetch", true); - conf.set("tez.staging-dir", TEST_DATA_DIR); - conf.setBoolean("tez.ignore.lib.uris", true); - conf.set("hive.tez.container.size", "128"); - conf.setBoolean("hive.merge.tezfiles", false); - conf.setBoolean("hive.in.tez.test", true); - } -} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index a76710975d14..bd8366e82675 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConfForTest; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil; import org.apache.hadoop.hive.metastore.txn.TxnStore; @@ -61,7 +62,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class TxnCommandsBaseForTests extends TezBaseForTests { +public abstract class TxnCommandsBaseForTests { private static final Logger LOG = LoggerFactory.getLogger(TxnCommandsBaseForTests.class); //bucket count for test tables; set it to 1 for easier debugging @@ -107,8 +108,7 @@ public void setUp() throws Exception { } } void initHiveConf() { - hiveConf = new HiveConf(this.getClass()); - setupTez(hiveConf); + hiveConf = new HiveConfForTest(this.getClass()); } void setUpInternal() throws Exception { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java index 5a97bfe896ba..5787951a3bf0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/DbTxnManagerEndToEndTestBase.java @@ -21,6 +21,7 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConfForTest; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil; import org.apache.hadoop.hive.metastore.txn.TxnStore; @@ -28,7 +29,6 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.TezBaseForTests; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientWithLocalCache; import org.apache.hadoop.hive.ql.session.SessionState; import org.junit.After; @@ -41,9 +41,9 @@ /** * Base class for "end-to-end" tests for DbTxnManager and simulate concurrent queries. */ -public abstract class DbTxnManagerEndToEndTestBase extends TezBaseForTests { +public abstract class DbTxnManagerEndToEndTestBase { - protected static HiveConf conf = new HiveConf(Driver.class); + protected static HiveConfForTest conf = new HiveConfForTest(DbTxnManagerEndToEndTestBase.class); protected HiveTxnManager txnMgr; protected Context ctx; protected Driver driver, driver2; @@ -68,8 +68,6 @@ public static void setUpDB() throws Exception{ @Before public void setUp() throws Exception { - setupTez(conf); - // set up metastore client cache if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.MSC_CACHE_ENABLED)) { HiveMetaStoreClientWithLocalCache.init(conf); @@ -110,10 +108,10 @@ public void tearDown() throws Exception { if (txnMgr != null) { txnMgr.closeTxnManager(); } - FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); + FileUtils.deleteDirectory(new File(conf.getTestDataDir())); } protected String getWarehouseDir() { - return TEST_DATA_DIR + "/warehouse"; + return conf.getTestDataDir() + "/warehouse"; } } From c0af530546b24e155956d2c97abf2698c5a2927b Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 28 Nov 2024 08:54:02 +0100 Subject: [PATCH 05/30] update tests --- .../hadoop/hive/conf/HiveConfForTest.java | 0 .../hadoop/hive/ql/TestTxnNoBuckets.java | 75 ++++++++++--------- .../hive/ql/TxnCommandsBaseForTests.java | 5 ++ 3 files changed, 44 insertions(+), 36 deletions(-) rename common/src/{test => java}/org/apache/hadoop/hive/conf/HiveConfForTest.java (100%) diff --git a/common/src/test/org/apache/hadoop/hive/conf/HiveConfForTest.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConfForTest.java similarity index 100% rename from common/src/test/org/apache/hadoop/hive/conf/HiveConfForTest.java rename to common/src/java/org/apache/hadoop/hive/conf/HiveConfForTest.java diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index 40a0a169744b..e603a717c36c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -62,6 +62,7 @@ public void setUp() throws Exception { setUpInternal(); //see TestTxnNoBucketsVectorized for vectorized version hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); + useSmallGrouping(); } private boolean shouldVectorize() { @@ -270,10 +271,10 @@ public void testCTAS() throws Exception { " union all select a, b from " + Table.ACIDTBL); rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas3 order by ROW__ID"); String expected3[][] = { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001_0"}, - {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00002_0"}, - {"{\"writeid\":1,\"bucketid\":537067520,\"rowid\":0}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00003_0"}, + {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0001/bucket_00000_0"}, + {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":0}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0002/bucket_00000_0"}, + {"{\"writeid\":1,\"bucketid\":536936449,\"rowid\":0}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0001/bucket_00001_0"}, + {"{\"writeid\":1,\"bucketid\":536936450,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0002/bucket_00001_0"}, }; checkExpected(rs, expected3, "Unexpected row count after ctas from union all query"); @@ -384,14 +385,14 @@ public void testToAcidConversionMultiBucket() throws Exception { List rs = runStatementOnDriver("select a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME"); //previous insert+union creates 3 data files (0-3) - //insert (12,12) creates 000000_0_copy_1 + //insert (12,12) creates 000000_0 String expected[][] = { - {"1\t2", "warehouse/t/000002_0"}, - {"2\t4", "warehouse/t/000002_0"}, - {"5\t6", "warehouse/t/000000_0"}, - {"6\t8", "warehouse/t/000001_0"}, - {"9\t10", "warehouse/t/000000_0"}, - {"12\t12", "warehouse/t/000000_0_copy_1"} + {"1\t2", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"2\t4", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"5\t6", "warehouse/t/HIVE_UNION_SUBDIR_2/000000_0"}, + {"6\t8", "warehouse/t/HIVE_UNION_SUBDIR_2/000001_0"}, + {"9\t10", "warehouse/t/HIVE_UNION_SUBDIR_3/000000_0"}, + {"12\t12", "warehouse/t/000000_0"} }; checkExpected(rs, expected,"before converting to acid"); @@ -405,13 +406,13 @@ public void testToAcidConversionMultiBucket() throws Exception { " where a between 5 and 7"); //now we have a table with data files at multiple different levels. String expected1[][] = { - {"1\t2", "warehouse/t/000002_0"}, - {"2\t4", "warehouse/t/000002_0"}, - {"5\t6", "warehouse/t/000000_0"}, - {"6\t8", "warehouse/t/000001_0"}, - {"9\t10", "warehouse/t/000000_0"}, + {"1\t2", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"2\t4", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"5\t6", "warehouse/t/HIVE_UNION_SUBDIR_2/000000_0"}, + {"6\t8", "warehouse/t/HIVE_UNION_SUBDIR_2/000001_0"}, + {"9\t10", "warehouse/t/HIVE_UNION_SUBDIR_3/000000_0"}, {"10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, - {"12\t12", "warehouse/t/000000_0_copy_1"}, + {"12\t12", "warehouse/t/000000_0"}, {"20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, {"50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"}, {"60\t80", "warehouse/t/HIVE_UNION_SUBDIR_16/000001_0"} @@ -429,16 +430,16 @@ now that T is Acid, data for each writerId is treated like a logical bucket (tho logical bucket (tranche) */ String expected2[][] = { - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/000002_0"}, - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/000002_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "warehouse/t/000000_0"}, - {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t6\t8", "warehouse/t/000001_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t9\t10", "warehouse/t/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t2\t4", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t5\t6", "warehouse/t/HIVE_UNION_SUBDIR_2/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t6\t8", "warehouse/t/HIVE_UNION_SUBDIR_2/000001_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":7}\t9\t10", "warehouse/t/HIVE_UNION_SUBDIR_3/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t12\t12", "warehouse/t/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"}, - {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t60\t80", "warehouse/t/HIVE_UNION_SUBDIR_16/000001_0"}, + {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t60\t80", "warehouse/t/HIVE_UNION_SUBDIR_16/000001_0"}, }; checkExpected(rs, expected2,"after converting to acid (no compaction)"); Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); @@ -450,12 +451,12 @@ logical bucket (tranche) assertVectorized(shouldVectorize(), "delete from T where b = 8"); runStatementOnDriver("delete from T where b = 8"); String expected3[][] = { - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/000002_0"}, - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/000002_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "warehouse/t/000000_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t9\t10", "warehouse/t/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t2\t4", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t5\t6", "warehouse/t/HIVE_UNION_SUBDIR_2/000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":7}\t9\t10", "warehouse/t/HIVE_UNION_SUBDIR_3/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t12\t12", "warehouse/t/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"}, // update for "{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t60\t80" @@ -471,17 +472,17 @@ logical bucket (tranche) /*Compaction preserves location of rows wrt buckets/tranches (for now)*/ String expected4[][] = { - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", - "warehouse/t/base_10000002_v0000015/bucket_00002"}, - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", - "warehouse/t/base_10000002_v0000015/bucket_00002"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t5\t6", + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/t/base_10000002_v0000015/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t9\t10", + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t2\t4", + "warehouse/t/base_10000002_v0000015/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t5\t6", + "warehouse/t/base_10000002_v0000015/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":7}\t9\t10", "warehouse/t/base_10000002_v0000015/bucket_00000"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/base_10000002_v0000015/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t12\t12", "warehouse/t/base_10000002_v0000015/bucket_00000"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_10000002_v0000015/bucket_00000"}, @@ -705,6 +706,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { assertVectorized(shouldVectorize(), query); //doesn't vectorize (uses neither of the Vectorzied Acid readers) + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); query = "select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a"; rs = runStatementOnDriver(query); Assert.assertEquals("", 2, rs.size()); @@ -717,6 +719,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { //vectorized because there is INPUT__FILE__NAME assertVectorized(false, query); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); runStatementOnDriver("update T set b = 17 where a = 1"); //this should use VectorizedOrcAcidRowReader query = "select ROW__ID, b from T where b > 0 order by a"; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index bd8366e82675..9c71e5af6551 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -155,6 +155,11 @@ void setUpInternal() throws Exception { setUpSchema(); } + protected void useSmallGrouping() { + hiveConf.set("tez.grouping.max-size", "10"); + hiveConf.set("tez.grouping.min-size", "1"); + } + protected void setUpSchema() throws Exception { runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); From fb41ae8c64e3aa716eb054360cf10f91f5aa4600 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 28 Nov 2024 14:06:25 +0100 Subject: [PATCH 06/30] fix assert vectorization --- .../hadoop/hive/ql/TestTxnNoBuckets.java | 12 ++-- .../hive/ql/TxnCommandsBaseForTests.java | 65 +++++++++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index e603a717c36c..f29b3674b86a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -679,7 +679,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { checkExpected(rs, expected, "After conversion"); Assert.assertEquals(Integer.toString(6), rs.get(0)); Assert.assertEquals(Integer.toString(9), rs.get(1)); - assertVectorized(shouldVectorize(), query); + assertMappersAreVectorized(query); //why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level //this uses VectorizedOrcAcidRowBatchReader @@ -690,7 +690,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9"} }; checkExpected(rs, expected1, "After conversion with VC1"); - assertVectorized(shouldVectorize(), query); + assertMappersAreVectorized(query); //this uses VectorizedOrcAcidRowBatchReader query = "select ROW__ID, a from T where b > 0 order by a"; @@ -703,7 +703,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9"} }; checkExpected(rs, expected2, "After conversion with VC2"); - assertVectorized(shouldVectorize(), query); + assertMappersAreVectorized(query); //doesn't vectorize (uses neither of the Vectorzied Acid readers) hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); @@ -717,7 +717,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { checkExpected(rs, expected3, "After non-vectorized read"); Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); //vectorized because there is INPUT__FILE__NAME - assertVectorized(false, query); + assertMappersAreNotVectorized(query); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); runStatementOnDriver("update T set b = 17 where a = 1"); @@ -732,7 +732,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}","10"} }; checkExpected(rs, expected4, "After conversion with VC4"); - assertVectorized(shouldVectorize(), query); + assertMappersAreVectorized(query); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); @@ -759,7 +759,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { }; checkExpected(rs, expected5, "After major compaction"); //vectorized because there is INPUT__FILE__NAME - assertVectorized(false, query); + assertMappersAreNotVectorized(query); } private void checkExpected(List rs, String[][] expected, String msg) { super.checkExpected(rs, expected, msg, LOG, true); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 9c71e5af6551..1be9baa5d0fb 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -23,12 +23,17 @@ import java.util.Comparator; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.esotericsoftware.kryo.util.ObjectMap; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -62,6 +67,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.commons.lang3.StringUtils.isBlank; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + public abstract class TxnCommandsBaseForTests { private static final Logger LOG = LoggerFactory.getLogger(TxnCommandsBaseForTests.class); @@ -324,6 +332,63 @@ void assertVectorized(boolean vectorized, String query) throws Exception { } Assert.assertTrue("Din't find expected 'vectorized' in plan", !vectorized); } + /** + * Runs Vectorized Explain on the query and checks if the plan is vectorized as expected + */ + protected void assertVectorized(String query) throws Exception { + List rs = runStatementOnDriver("EXPLAIN VECTORIZATION DETAIL " + query); + for (String line : rs) { + if (line != null && line.contains("vectorized: false")) { + Assert.fail("Execution is not vectorized"); + return; + } + } + Assert.fail("Didn't find expected 'vectorized' in plan"); + } + + protected void assertMappersAreVectorized(String query) throws Exception { + assertMapperExecutionMode(query, "Mapper was not vectorized: ", + executionMode -> isBlank(executionMode) || !executionMode.contains("vectorized")); + } + + protected void assertMappersAreNotVectorized(String query) throws Exception { + assertMapperExecutionMode(query, "Mapper was vectorized but was not expected: ", + executionMode -> isNotBlank(executionMode) && executionMode.contains("vectorized")); + } + + protected void assertMapperExecutionMode(String query, String message, Predicate predicate) + throws Exception { + List rs = runStatementOnDriver("EXPLAIN FORMATTED VECTORIZATION DETAIL " + query); + ObjectMapper objectMapper = new ObjectMapper(); + Map plan = objectMapper.readValue(rs.get(0), Map.class); + Map stages = (Map) plan.get("STAGE PLANS"); + Map tezStage = null; + if (stages == null) { + Assert.fail("Execution plan of query does not have have stages: " + rs.get(0)); + } + for (Map.Entry stageEntry : stages.entrySet()) { + Map stage = (Map) stageEntry.getValue(); + tezStage = (Map) stage.get("Tez"); + if (tezStage != null) { + break; + } + } + if (tezStage == null) { + Assert.fail("Execution plan of query does not contain a Tez stage: " + rs.get(0)); + } + Map vertices = (Map) tezStage.get("Vertices:"); + if (vertices == null) { + Assert.fail("Execution plan of query does not contain Tez vertices: " + rs.get(0)); + } + for (Map.Entry vertexEntry : stages.entrySet()) { + if (vertexEntry.getKey() == null || !vertexEntry.getKey().startsWith("Map")) { + continue; + } + Map mapVertex = (Map) vertexEntry.getValue(); + String executionMode = (String) mapVertex.get("Execution mode"); + Assert.assertTrue(message + rs.get(0), predicate.test(executionMode)); + } + } /** * Will assert that actual files match expected. * @param expectedFiles - suffixes of expected Paths. Must be the same length From 7ca9475a95c2e721b2c079485b11b8743fe5d526 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 2 Dec 2024 09:45:39 +0100 Subject: [PATCH 07/30] fix grouping size --- .../hadoop/hive/ql/TestTxnNoBuckets.java | 1 - .../hive/ql/TestTxnNoBucketsVectorized.java | 8 ++++++ .../hive/ql/TxnCommandsBaseForTests.java | 26 ++++++------------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index f29b3674b86a..8d22f1d5bd53 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -62,7 +62,6 @@ public void setUp() throws Exception { setUpInternal(); //see TestTxnNoBucketsVectorized for vectorized version hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); - useSmallGrouping(); } private boolean shouldVectorize() { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java index 8aa967f501a4..cd2d2306123c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java @@ -18,10 +18,18 @@ package org.apache.hadoop.hive.ql; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.BucketCodec; +import org.junit.Assert; import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; public class TestTxnNoBucketsVectorized extends TestTxnNoBuckets { @Before + @Override public void setUp() throws Exception { setUpInternal(); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 1be9baa5d0fb..83591748ac95 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -117,6 +117,11 @@ public void setUp() throws Exception { } void initHiveConf() { hiveConf = new HiveConfForTest(this.getClass()); + // Multiple tests requires more than one buckets per write. Use a very small value for grouping size to create + // multiple mapper instances with FileSinkOperators. The number of buckets are depends on the size of the data + // written and the grouping size. Most test cases expects 2 buckets. + hiveConf.set("tez.grouping.max-size", "10"); + hiveConf.set("tez.grouping.min-size", "1"); } void setUpInternal() throws Exception { @@ -163,11 +168,6 @@ void setUpInternal() throws Exception { setUpSchema(); } - protected void useSmallGrouping() { - hiveConf.set("tez.grouping.max-size", "10"); - hiveConf.set("tez.grouping.min-size", "1"); - } - protected void setUpSchema() throws Exception { runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); @@ -332,21 +332,11 @@ void assertVectorized(boolean vectorized, String query) throws Exception { } Assert.assertTrue("Din't find expected 'vectorized' in plan", !vectorized); } - /** - * Runs Vectorized Explain on the query and checks if the plan is vectorized as expected - */ - protected void assertVectorized(String query) throws Exception { - List rs = runStatementOnDriver("EXPLAIN VECTORIZATION DETAIL " + query); - for (String line : rs) { - if (line != null && line.contains("vectorized: false")) { - Assert.fail("Execution is not vectorized"); - return; - } - } - Assert.fail("Didn't find expected 'vectorized' in plan"); - } protected void assertMappersAreVectorized(String query) throws Exception { + if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { + return; + } assertMapperExecutionMode(query, "Mapper was not vectorized: ", executionMode -> isBlank(executionMode) || !executionMode.contains("vectorized")); } From 115327a2fd0e2718fc566fc0e21e0dfa10810d0b Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 2 Dec 2024 10:31:55 +0100 Subject: [PATCH 08/30] inline asserVectorize to subclasses --- .../hadoop/hive/ql/TestTxnNoBuckets.java | 9 ++++++-- .../hive/ql/TestTxnNoBucketsVectorized.java | 5 +++++ .../hive/ql/TxnCommandsBaseForTests.java | 21 +++++-------------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index 8d22f1d5bd53..05c053c6f510 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -445,9 +445,9 @@ logical bucket (tranche) Assert.assertEquals(2, BucketCodec.determineVersion(537001984).decodeWriterId(537001984)); Assert.assertEquals(1, BucketCodec.determineVersion(536936448).decodeWriterId(536936448)); - assertVectorized(shouldVectorize(), "update T set b = 88 where b = 80"); + assertVectorized("update T set b = 88 where b = 80"); runStatementOnDriver("update T set b = 88 where b = 80"); - assertVectorized(shouldVectorize(), "delete from T where b = 8"); + assertVectorized("delete from T where b = 8"); runStatementOnDriver("delete from T where b = 8"); String expected3[][] = { {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, @@ -492,6 +492,11 @@ logical bucket (tranche) }; checkExpected(rs, expected4,"after major compact"); } + + protected void assertVectorized(String query) throws Exception { + assertMappersAreNotVectorized(query); + } + @Test public void testInsertFromUnion() throws Exception { int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}}; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java index cd2d2306123c..69b39e5d76a0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java @@ -34,4 +34,9 @@ public void setUp() throws Exception { setUpInternal(); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); } + + @Override + protected void assertVectorized(String query) throws Exception { + assertMappersAreVectorized(query); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 83591748ac95..c3e769e1adc4 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -318,21 +318,6 @@ protected CommandProcessorException runStatementOnDriverNegative(String stmt) { throw new RuntimeException("Didn't get expected failure!"); } - /** - * Runs Vectorized Explain on the query and checks if the plan is vectorized as expected - * @param vectorized {@code true} - assert that it's vectorized - */ - void assertVectorized(boolean vectorized, String query) throws Exception { - List rs = runStatementOnDriver("EXPLAIN VECTORIZATION DETAIL " + query); - for(String line : rs) { - if(line != null && line.contains("Execution mode: vectorized")) { - Assert.assertTrue("Was vectorized when it wasn't expected", vectorized); - return; - } - } - Assert.assertTrue("Din't find expected 'vectorized' in plan", !vectorized); - } - protected void assertMappersAreVectorized(String query) throws Exception { if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { return; @@ -431,7 +416,11 @@ void logResult(Logger LOG, List rs) { protected void checkResult(String[][] expectedResult, String query, boolean isVectorized, String msg, Logger LOG) throws Exception{ List rs = runStatementOnDriver(query); checkExpected(rs, expectedResult, msg + (isVectorized ? " vect" : ""), LOG, !isVectorized); - assertVectorized(isVectorized, query); + if (isVectorized) { + assertMappersAreVectorized(query); + } else { + assertMappersAreNotVectorized(query); + } } void dropTables(String... tables) throws Exception { HiveConf queryConf = d.getQueryState().getConf(); From ed2bed3704e2a21af20ef920b987d3f30e81789f Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 2 Dec 2024 13:31:08 +0100 Subject: [PATCH 09/30] fix testNonAcidToAcidConversion --- .../hadoop/hive/ql/TestTxnCommands2.java | 118 ++++++++---------- 1 file changed, 51 insertions(+), 67 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 3386404cbf0c..ba439b10aa9d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -365,13 +365,13 @@ public void testNonAcidToAcidConversion02() throws Exception { * Note: order of rows in a file ends up being the reverse of order in values clause (why?!) */ String[][] expected = { - {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t13", "bucket_00001"}, + {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":3}\t0\t13", "bucket_00001"}, {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t0\t15", "bucket_00001"}, {"{\"writeid\":10000003,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"}, {"{\"writeid\":10000002,\"bucketid\":536936449,\"rowid\":0}\t0\t120", "bucket_00001"}, - {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "bucket_00001"}, - {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":5}\t1\t4", "bucket_00001"}, - {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t1\t5", "bucket_00001"}, + {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2", "bucket_00001"}, + {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t1\t4", "bucket_00001"}, + {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":5}\t1\t5", "bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":6}\t1\t6", "bucket_00001"}, {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t1\t16", "bucket_00001"} }; @@ -426,11 +426,9 @@ public void testNonAcidToAcidConversion1() throws Exception { runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + // There should be 1 original bucket file in the location (000001_0) + Assert.assertEquals(1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -443,10 +441,8 @@ public void testNonAcidToAcidConversion1() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + Assert.assertEquals(1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -458,9 +454,9 @@ public void testNonAcidToAcidConversion1() throws Exception { runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. + // There should be 1 original bucket file (000001_0), plus a new delta directory. // The delta directory should also have only 1 bucket file (bucket_00001) - Assert.assertEquals(3, status.length); + Assert.assertEquals(2, status.length); boolean sawNewDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { @@ -484,10 +480,10 @@ public void testNonAcidToAcidConversion1() throws Exception { runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new directory: base_xxxxxxx. - // Original bucket files and delta directory should stay until Cleaner kicks in. + // Original bucket file and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(4, status.length); + Assert.assertEquals(3, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -517,8 +513,8 @@ public void testNonAcidToAcidConversion1() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); // Before Cleaner, there should be 5 items: - // 2 original files, 1 original directory, 1 base directory and 1 delta directory - Assert.assertEquals(5, status.length); + // 1 original file, 1 original directory, 1 base directory and 1 delta directory + Assert.assertEquals(4, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_xxxxxxx. // Original bucket files and delta directory should have been cleaned up. @@ -556,11 +552,9 @@ public void testNonAcidToAcidConversion2() throws Exception { runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + // There should be 1 original bucket file in the location (000001_0) + Assert.assertEquals(1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -573,10 +567,8 @@ public void testNonAcidToAcidConversion2() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + Assert.assertEquals(1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -588,12 +580,12 @@ public void testNonAcidToAcidConversion2() throws Exception { runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus one delta directory + // There should be 1 original bucket file (000001_0), plus one delta directory // and one delete_delta directory. When split-update is enabled, an update event is split into // a combination of delete and insert, that generates the delete_delta directory. // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) // and so should the delete_delta directory. - Assert.assertEquals(4, status.length); + Assert.assertEquals(3, status.length); boolean sawNewDelta = false; boolean sawNewDeleteDelta = false; for (int i = 0; i < status.length; i++) { @@ -624,10 +616,10 @@ public void testNonAcidToAcidConversion2() throws Exception { runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new directory: base_0000001. - // Original bucket files and delta directory should stay until Cleaner kicks in. + // Original bucket file and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(5, status.length); + Assert.assertEquals(4, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -649,8 +641,8 @@ public void testNonAcidToAcidConversion2() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); // Before Cleaner, there should be 5 items: - // 2 original files, 1 delta directory, 1 delete_delta directory and 1 base directory - Assert.assertEquals(5, status.length); + // 1 original file, 1 delta directory, 1 delete_delta directory and 1 base directory + Assert.assertEquals(4, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. // Original bucket files, delta directory and delete_delta should have been cleaned up. @@ -688,11 +680,9 @@ public void testNonAcidToAcidConversion3() throws Exception { runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + // There should be 1 original bucket file in the location (000001_0) + Assert.assertEquals(1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -705,10 +695,8 @@ public void testNonAcidToAcidConversion3() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + Assert.assertEquals(1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -720,10 +708,10 @@ public void testNonAcidToAcidConversion3() throws Exception { runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new directory: base_-9223372036854775808 - // Original bucket files should stay until Cleaner kicks in. + // Original bucket file should stay until Cleaner kicks in. status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(3, status.length); + Assert.assertEquals(2, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -750,10 +738,10 @@ public void testNonAcidToAcidConversion3() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 - // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, + // There should be 1 original bucket file (000001_0), a base directory, // plus two new delta directories and one delete_delta directory that would be created due to // the update statement (remember split-update U=D+I)! - Assert.assertEquals(6, status.length); + Assert.assertEquals(5, status.length); int numDelta = 0; int numDeleteDelta = 0; sawNewBase = false; @@ -805,12 +793,12 @@ public void testNonAcidToAcidConversion3() throws Exception { runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new base directory: base_00000016 - // Original bucket files, delta directories, delete_delta directories and the + // Original bucket file, delta directories, delete_delta directories and the // previous base directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(status); - Assert.assertEquals(7, status.length); + Assert.assertEquals(6, status.length); int numBase = 0; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -841,8 +829,8 @@ public void testNonAcidToAcidConversion3() throws Exception { status = fs.listStatus(new Path(getWarehouseDir() + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); // Before Cleaner, there should be 6 items: - // 2 original files, 2 delta directories, 1 delete_delta directory and 2 base directories - Assert.assertEquals(7, status.length); + // 1 original file, 2 delta directories, 1 delete_delta directory and 2 base directories + Assert.assertEquals(6, status.length); runCleaner(hiveConf); runCleaner(hiveConf); // There should be only 1 directory left: base_00000016 @@ -883,11 +871,9 @@ public void testNonAcidToAcidConversion4() throws Exception { runStatementOnDriver("insert into " + Table.NONACIDNESTEDPART + " partition(p='p1',q='q1') " + makeValuesClause(targetVals)); status = listFilesByTable(fs, Table.NONACIDNESTEDPART); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + // There should be 1 original bucket file in the location (000001_0) + Assert.assertEquals(BUCKET_COUNT - 1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); List rs = runStatementOnDriver("select a,b from " + Table.NONACIDNESTEDPART); Assert.assertEquals(stringifyValues(targetVals), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDNESTEDPART); @@ -898,10 +884,8 @@ public void testNonAcidToAcidConversion4() throws Exception { runStatementOnDriver("alter table " + Table.NONACIDNESTEDPART + " SET TBLPROPERTIES ('transactional'='true')"); status = listFilesByTable(fs, Table.NONACIDNESTEDPART); // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } + Assert.assertEquals(BUCKET_COUNT - 1, status.length); + Assert.assertTrue(status[0].getPath().getName().matches("000001_0")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDNESTEDPART); Assert.assertEquals(stringifyValues(targetVals), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDNESTEDPART); @@ -914,7 +898,7 @@ public void testNonAcidToAcidConversion4() throws Exception { // There should be 1 new directory: base_-9223372036854775808 // Original bucket files should stay until Cleaner kicks in. status = listFilesByTable(fs, Table.NONACIDNESTEDPART); - Assert.assertEquals(3, status.length); + Assert.assertEquals(2, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { Path parent = status[i].getPath().getParent(); @@ -938,10 +922,10 @@ public void testNonAcidToAcidConversion4() throws Exception { runStatementOnDriver("insert into " + Table.NONACIDNESTEDPART + "(a,b,p,q) values(3,4,'p1','q1')"); status = listFilesByTable(fs, Table.NONACIDNESTEDPART); Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 - // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, + // There should be 1 original bucket file (000001_0), a base directory, // plus two new delta directories and one delete_delta directory that would be created due to // the update statement (remember split-update U=D+I)! - Assert.assertEquals(6, status.length); + Assert.assertEquals(5, status.length); int numDelta = 0; int numDeleteDelta = 0; sawNewBase = false; @@ -992,11 +976,11 @@ public void testNonAcidToAcidConversion4() throws Exception { runStatementOnDriver("alter table "+ Table.NONACIDNESTEDPART + " partition(p='p1',q='q1') compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new base directory: base_00000016 - // Original bucket files, delta directories, delete_delta directories and the + // Original bucket file, delta directories, delete_delta directories and the // previous base directory should stay until Cleaner kicks in. status = listFilesByTable(fs, Table.NONACIDNESTEDPART); Arrays.sort(status); - Assert.assertEquals(8, status.length); + Assert.assertEquals(7, status.length); int numBase = 0; Set bases = new HashSet<>(); for (int i = 0; i < status.length; i++) { @@ -1029,9 +1013,9 @@ public void testNonAcidToAcidConversion4() throws Exception { // 6. Let Cleaner delete obsolete files/dirs status = listFilesByTable(fs, Table.NONACIDNESTEDPART); // Before Cleaner, there should be 8 items: - // 2 original files, 2 delta directories (1 files each), 1 delete_delta directory (1 file) and 2 base directories (with one and two files respectively) + // 1 original file, 2 delta directories (1 files each), 1 delete_delta directory (1 file) and 2 base directories (with one and two files respectively) - Assert.assertEquals(8, status.length); + Assert.assertEquals(7, status.length); runCleaner(hiveConf); runCleaner(hiveConf); // There should be only 1 directory left: base_00000016 From 51b23c7921723595fcea234ff88a801bade3938b Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 2 Dec 2024 15:20:49 +0100 Subject: [PATCH 10/30] testFailHeartbeater --- .../apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java index 0a174a6651a2..c10f134c95b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java @@ -280,6 +280,7 @@ public int monitorExecution() { // best effort } console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e)); + diagnostics.append(e.getMessage()); rc = 1; done = true; } else { From c01da5f1bdadae5750645776672d643bf1bf57ad Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Tue, 3 Dec 2024 14:03:54 +0100 Subject: [PATCH 11/30] testCleanerForTxnToWriteId --- .../hadoop/hive/ql/TestTxnCommands2.java | 3 +++ .../hive/ql/TxnCommandsBaseForTests.java | 23 ++++++++++++++++--- .../MinOpenTxnIdWaterMarkFunction.java | 2 +- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index ba439b10aa9d..322721b6c7b8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -2434,6 +2434,9 @@ public void testCleanerForTxnToWriteId() throws Exception { // Keep an open txn which refers to the aborted txn. Context ctx = new Context(hiveConf); HiveTxnManager txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(hiveConf); + // Txn is not considered committed or aborted until TXN_OPENTXN_TIMEOUT expires + // See MinOpenTxnIdWaterMarkFunction, OpenTxnTimeoutLowBoundaryTxnIdHandler + waitUntilAllTxnFinished(); txnMgr.openTxn(ctx, "u1"); txnMgr.getValidTxns(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index c3e769e1adc4..5f9cbecde02a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -28,11 +28,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Predicate; -import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.esotericsoftware.kryo.util.ObjectMap; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileSystem; @@ -43,12 +41,13 @@ import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConfForTest; +import org.apache.hadoop.hive.metastore.DatabaseProduct; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.txn.entities.TxnStatus; import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; -import org.apache.hadoop.hive.ql.lockmgr.DbTxnManagerEndToEndTestBase; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientWithLocalCache; import org.apache.hadoop.hive.ql.processors.CommandProcessorException; import org.apache.hadoop.hive.ql.session.SessionState; @@ -69,6 +68,8 @@ import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; +import static org.apache.hadoop.hive.metastore.DatabaseProduct.determineDatabaseProduct; +import static org.apache.hadoop.hive.metastore.txn.TxnUtils.getEpochFn; public abstract class TxnCommandsBaseForTests { private static final Logger LOG = LoggerFactory.getLogger(TxnCommandsBaseForTests.class); @@ -435,4 +436,20 @@ Driver swapDrivers(Driver otherDriver) { d = otherDriver; return tmp; } + + protected void waitUntilAllTxnFinished() throws Exception { + long openTxnTimeOutMillis = MetastoreConf.getTimeVar( + hiveConf, MetastoreConf.ConfVars.TXN_OPENTXN_TIMEOUT, TimeUnit.MILLISECONDS); + while (getOpenTxnCount(openTxnTimeOutMillis) > 0) { + Thread.sleep(openTxnTimeOutMillis); + } + } + + protected int getOpenTxnCount(long openTxnTimeOutMillis) throws Exception { + int counted = TestTxnDbUtil.countQueryAgent(hiveConf, + "select count(*) from TXNS where TXN_STATE = '" + TxnStatus.OPEN.getSqlConst() + "' " + + "or TXN_STARTED >= (" + getEpochFn(determineDatabaseProduct(DatabaseProduct.DERBY_NAME, hiveConf)) + + " - " + openTxnTimeOutMillis + ")"); + return counted; + } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java index 0407729af01b..dee11031fb67 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java @@ -61,7 +61,7 @@ public Long execute(MultiDataSourceJdbcResource jdbcResource) throws MetaExcepti return id; })); long lowWaterMark = jdbcResource.execute(new OpenTxnTimeoutLowBoundaryTxnIdHandler(openTxnTimeOutMillis)); - LOG.debug("MinOpenTxnIdWaterMark calculated with minOpenTxn {}, lowWaterMark {}", minOpenTxn, lowWaterMark); + LOG.info("MinOpenTxnIdWaterMark calculated with minOpenTxn {}, lowWaterMark {}", minOpenTxn, lowWaterMark); return Long.min(minOpenTxn, lowWaterMark + 1); } } From f1e30e589c1c15d888a27f8f4150cd9a6b8e9d8e Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Tue, 3 Dec 2024 14:50:09 +0100 Subject: [PATCH 12/30] testDynPartUpdateWithAborts --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 322721b6c7b8..0c8cd5c9057c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -2792,7 +2792,7 @@ public void testDynPartUpdateWithAborts() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_MODE_FAIL_LOAD_DYNAMIC_PARTITION, true); runStatementOnDriverWithAbort("update " + Table.ACIDTBLPART + " set b=a+2 where a<5"); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_MODE_FAIL_LOAD_DYNAMIC_PARTITION, false); - verifyDeltaDirAndResult(2, Table.ACIDTBLPART.toString(), "p=p1", resultData1); + verifyDeltaDirAndResult(3, Table.ACIDTBLPART.toString(), "p=p1", resultData1); verifyDeleteDeltaDir(1, Table.ACIDTBLPART.toString(), "p=p1"); int count = TestTxnDbUtil From 2c90b6fbaf2e1e97b06a293e9fd54ea6c7588d57 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Tue, 3 Dec 2024 15:08:10 +0100 Subject: [PATCH 13/30] testOrcPPD --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 0c8cd5c9057c..2d2d58c06398 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -205,7 +205,8 @@ public void testOrcNoPPD() throws Exception { * @throws Exception */ private void testOrcPPD(boolean enablePPD) throws Exception { - boolean originalPpd = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_OPT_INDEX_FILTER); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_EXPLAIN_USER, false); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPT_PPD, enablePPD);//enables PPD hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPT_INDEX_FILTER, enablePPD);//enables ORC PPD //create delta_0001_0001_0000 (should push predicate here) runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(new int[][]{{1, 2}, {3, 4}})); @@ -264,7 +265,6 @@ private void testOrcPPD(boolean enablePPD) throws Exception { List rs1 = runStatementOnDriver(query); int [][] resultData = new int[][] {{3, 5}, {5, 6}, {9, 10}}; Assert.assertEquals("Update failed", stringifyValues(resultData), rs1); - hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPT_INDEX_FILTER, originalPpd); } static void assertExplainHasString(String string, List queryPlan, String errMsg) { From dd9fcaa5008f847512346cef3c2f1e5bef3d3be7 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 09:39:17 +0100 Subject: [PATCH 14/30] testFileSystemUnCaching --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 2d2d58c06398..9387c9b22bc1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -1319,6 +1319,7 @@ public void testInitiatorWithMinorCompactionForInsertOnlyTable() throws Exceptio * Make sure there's no FileSystem$Cache$Key leak due to UGI use * @throws Exception */ + @Ignore("FileSystem.closeAllForUGI is never called from Compaction related threads") @Test public void testFileSystemUnCaching() throws Exception { int cacheSizeBefore; From 1ddce00bb90cb46f68325ca7767e6056751f3759 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 10:56:22 +0100 Subject: [PATCH 15/30] testCleaner2 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java | 3 +-- .../org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index 563b86a471fd..d2754fa30a79 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -338,8 +338,7 @@ public void testCleaner2() throws Exception { "t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t4", "t/delta_0000002_0000002_0000/bucket_00000_0"}}; - checkResult(expected, testQuery, false, "check data", LOG); - + checkResult(expected, testQuery, "check data", LOG); txnMgr2 = swapTxnManager(txnMgr1); driver2 = swapDrivers(driver1); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 5f9cbecde02a..872010270e0e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -423,6 +423,10 @@ protected void checkResult(String[][] expectedResult, String query, boolean isVe assertMappersAreNotVectorized(query); } } + protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) throws Exception{ + List rs = runStatementOnDriver(query); + checkExpected(rs, expectedResult, msg, LOG, true); + } void dropTables(String... tables) throws Exception { HiveConf queryConf = d.getQueryState().getConf(); queryConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); From 7cc829c8e693d0950f4414ce5985f9c75812d756 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 10:57:25 +0100 Subject: [PATCH 16/30] testRenameTable --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index d2754fa30a79..6bdda301fce6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -91,7 +91,7 @@ public void testRenameTable() throws Exception { "s/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t4\t6", "s/delta_0000002_0000002_0001/bucket_00000_0"}}; - checkResult(expected, testQuery, false, "check data", LOG); + checkResult(expected, testQuery, "check data", LOG); Assert.assertEquals(0, TestTxnDbUtil.countQueryAgent(hiveConf, From e5cc0e269ffbc45464519afb95f69e5ec5678e7a Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 11:06:32 +0100 Subject: [PATCH 17/30] addPartitionRename --- .../test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java index d07a2281a142..6809a6d99cc8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java @@ -249,7 +249,7 @@ public void addPartitionRename() throws Exception { runStatementOnDriver("create table Tstage (a int, b int) clustered by (a) into 2 " + "buckets stored as orc tblproperties('transactional'='false')"); - runStatementOnDriver("insert into Tstage values(0,2),(1,4)"); + runStatementOnDriver("insert into Tstage values(0,2),(2,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); FileSystem fs = FileSystem.get(hiveConf); fs.rename(new Path(getWarehouseDir() + "/1/data/000000_0"), new Path(getWarehouseDir() + "/1/data/part-m000")); @@ -261,9 +261,9 @@ public void addPartitionRename() throws Exception { List rs = runStatementOnDriver( "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); String[][] expected = new String[][]{ + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2\t4", + "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t0\t0\t2", - "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t1\t4", "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}}; checkExpected(rs, expected, "add partition (p=0)"); } From 93a2d8ddf253a1c44090d0d3acf492c02e679e6c Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 11:18:20 +0100 Subject: [PATCH 18/30] testConcatenateMM --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java index 89483bae5658..dd9391dc4844 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java @@ -133,6 +133,10 @@ public void testConcatenatePart() throws Exception { @Test public void testConcatenateMM() throws Exception { + // Only one bucket is expected in this test + hiveConf.set("tez.grouping.max-size", "1024"); + hiveConf.set("tez.grouping.min-size", "1"); + HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY, true); dropTables("T"); runStatementOnDriver("create table T(a int, b int)"); From d25dd5f7c26eabf3d5f21b0e3b9c91deb73dfd99 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 13:39:05 +0100 Subject: [PATCH 19/30] INPUT__FILE__NAME can be vectorized --- .../hadoop/hive/ql/TestTxnAddPartition.java | 8 ++--- .../hadoop/hive/ql/TestTxnCommands3.java | 36 +++++++++---------- .../apache/hadoop/hive/ql/TestTxnExIm.java | 3 +- .../hadoop/hive/ql/TestTxnLoadData.java | 14 +++----- .../hadoop/hive/ql/TestTxnNoBuckets.java | 2 +- .../hive/ql/TxnCommandsBaseForTests.java | 10 +++--- 6 files changed, 31 insertions(+), 42 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java index 6809a6d99cc8..9edddd0c34ef 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java @@ -97,8 +97,7 @@ private void addPartition(boolean isVectorized) throws Exception { " PARTITION (p=1) location '" + getWarehouseDir() + "/2/data'" + " PARTITION (p=2)"); - String testQuery = isVectorized ? "select ROW__ID, p, a, b from T order by p, ROW__ID" : - "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"; + String testQuery = "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"; String[][] expected = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, @@ -175,8 +174,7 @@ private void addPartitionMM(boolean isVectorized) throws Exception { " PARTITION (p=1) location '" + getWarehouseDir() + "/2/data'" + " PARTITION (p=2)"); - String testQuery = isVectorized ? "select p, a, b from T order by p, a, b" : - "select p, a, b, INPUT__FILE__NAME from T order by p, a, b"; + String testQuery = "select p, a, b, INPUT__FILE__NAME from T order by p, a, b"; String[][] expected = new String[][]{ {"0\t0\t2", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, {"0\t0\t4", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, @@ -232,7 +230,7 @@ public void addPartitionBucketed() throws Exception { } private void checkExpected(List rs, String[][] expected, String msg) { - super.checkExpected(rs, expected, msg, LOG, true); + super.checkExpected(rs, expected, msg, LOG); } /** diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index 6bdda301fce6..363b4d6d3b18 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -174,14 +174,12 @@ private void testDeleteEventPruning() throws Exception { boolean isVectorized = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); - String testQuery = isVectorized ? - "select ROW__ID, a, b from T order by a, b" : - "select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b"; + String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b"; String[][] expected = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t4\t5", - "warehouse/t/delta_0000001_0000001_0000/bucket_00000"}, + "warehouse/t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", - "warehouse/t/delta_0000002_0000002_0000/bucket_00000"}}; + "warehouse/t/delta_0000002_0000002_0000/bucket_00000_0"}}; checkResult(expected, testQuery, isVectorized, "after delete", LOG); runStatementOnDriver("alter table T compact 'MAJOR'"); @@ -197,9 +195,9 @@ private void testDeleteEventPruning() throws Exception { String[][] expected2 = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t4\t5", - "warehouse/t/base_0000001/bucket_00000"}, + "warehouse/t/base_0000003_v0000012/bucket_00000"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", - "warehouse/t/base_0000002/bucket_00000"}}; + "warehouse/t/base_0000003_v0000012/bucket_00000"}}; checkResult(expected2, testQuery, isVectorized, "after compaction", LOG); } /** @@ -256,17 +254,17 @@ public void testAcidMetaColumsDecode() throws Exception { */ @Test public void testSdpoBucketed() throws Exception { - testSdpoBucketed(true, true, 1); - testSdpoBucketed(true, false, 1); - testSdpoBucketed(false, true, 1); - testSdpoBucketed(false, false,1); - - testSdpoBucketed(true, true, 2); - testSdpoBucketed(true, false, 2); - testSdpoBucketed(false, true, 2); - testSdpoBucketed(false, false,2); + testSdpoBucketed(true, 1); + testSdpoBucketed(true, 1); + testSdpoBucketed(false, 1); + testSdpoBucketed(false, 1); + + testSdpoBucketed(true, 2); + testSdpoBucketed(true, 2); + testSdpoBucketed(false, 2); + testSdpoBucketed(false, 2); } - private void testSdpoBucketed(boolean isVectorized, boolean isSdpo, int bucketing_version) + private void testSdpoBucketed(boolean isVectorized, int bucketing_version) throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorized); runStatementOnDriver("drop table if exists acid_uap"); @@ -279,9 +277,7 @@ private void testSdpoBucketed(boolean isVectorized, boolean isSdpo, int bucketin "values (1, 'bah'),(2, 'yah')"); runStatementOnDriver("select a,b, ds from acid_uap order by a,b, ds"); - String testQuery = isVectorized ? - "select ROW__ID, a, b, ds from acid_uap order by ds, a, b" : - "select ROW__ID, a, b, ds, INPUT__FILE__NAME from acid_uap order by ds, a, b"; + String testQuery = "select ROW__ID, a, b, ds, INPUT__FILE__NAME from acid_uap order by ds, a, b"; String[][] expected = new String[][]{ {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttoday", "warehouse/acid_uap/ds=today/delta_0000002_0000002_0000/bucket_00001_0"}, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java index 6636ff252bd2..7b24f3760129 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java @@ -320,8 +320,7 @@ private void testImport(boolean isVectorized, boolean existingTarget) throws Exc //load into existing empty table T runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'"); - String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : - "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; + String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"}, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index d5d6a330f486..fb2f7149a465 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -108,8 +108,7 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { // 'data' is created by export command/ runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); - String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : - "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; + String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}}; @@ -193,8 +192,7 @@ private void loadData(boolean isVectorized) throws Exception { // 'data' is created by export command/ runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); - String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : - "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; + String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][] { //normal insert {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000001_0000/bucket_00000_0"}, @@ -271,8 +269,7 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti // (with 000000_0, 000000_0_copy_1, 000000_0_copy_2) runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); - String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : - "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; + String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; /* {"writeid":0,"bucketid":536870912,"rowid":0} 0 2/000000_0 {"writeid":0,"bucketid":536870912,"rowid":1} 0 4/000000_0 @@ -404,7 +401,7 @@ public void testValidations() throws Exception { } private void checkExpected(List rs, String[][] expected, String msg) { - super.checkExpected(rs, expected, msg, LOG, true); + super.checkExpected(rs, expected, msg, LOG); } @Test @@ -438,8 +435,7 @@ private void testMultiStatement(boolean isVectorized) throws Exception { runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("COMMIT"); - String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : - "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; + String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000_0"}, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index 05c053c6f510..c7b9c5622703 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -766,7 +766,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { assertMappersAreNotVectorized(query); } private void checkExpected(List rs, String[][] expected, String msg) { - super.checkExpected(rs, expected, msg, LOG, true); + super.checkExpected(rs, expected, msg, LOG); } /** * HIVE-17900 diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 872010270e0e..faf2bb0c35d6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -387,7 +387,7 @@ void assertExpectedFileSet(Set expectedFiles, String rootPath, String ta } Assert.assertEquals("Unexpected file list", expectedFiles, actualFiles); } - void checkExpected(List rs, String[][] expected, String msg, Logger LOG, boolean checkFileName) { + void checkExpected(List rs, String[][] expected, String msg, Logger LOG) { LOG.warn(testName.getMethodName() + ": read data(" + msg + "): "); logResult(LOG, rs); Assert.assertEquals(testName.getMethodName() + ": " + msg + "; " + rs, @@ -395,9 +395,9 @@ void checkExpected(List rs, String[][] expected, String msg, Logger LOG, //verify data and layout for(int i = 0; i < expected.length; i++) { Assert.assertTrue("Actual line (data) " + i + " data: " + rs.get(i) + "; expected " + expected[i][0], rs.get(i).startsWith(expected[i][0])); - if(checkFileName) { + if (expected.length == 2) { Assert.assertTrue("Actual line(file) " + i + " file: " + rs.get(i), - rs.get(i).endsWith(expected[i][1]) || rs.get(i).matches(expected[i][1])); + rs.get(i).endsWith(expected[i][1]) || rs.get(i).matches(expected[i][1])); } } } @@ -416,7 +416,7 @@ void logResult(Logger LOG, List rs) { */ protected void checkResult(String[][] expectedResult, String query, boolean isVectorized, String msg, Logger LOG) throws Exception{ List rs = runStatementOnDriver(query); - checkExpected(rs, expectedResult, msg + (isVectorized ? " vect" : ""), LOG, !isVectorized); + checkExpected(rs, expectedResult, msg + (isVectorized ? " vect" : ""), LOG); if (isVectorized) { assertMappersAreVectorized(query); } else { @@ -425,7 +425,7 @@ protected void checkResult(String[][] expectedResult, String query, boolean isVe } protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) throws Exception{ List rs = runStatementOnDriver(query); - checkExpected(rs, expectedResult, msg, LOG, true); + checkExpected(rs, expectedResult, msg, LOG); } void dropTables(String... tables) throws Exception { HiveConf queryConf = d.getQueryState().getConf(); From 46069313dd6c640e738537ef4a23df3f8c5893d5 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 13:49:22 +0100 Subject: [PATCH 20/30] extract databaseProduct --- .../org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index faf2bb0c35d6..3f9d83855e17 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -82,6 +82,7 @@ public abstract class TxnCommandsBaseForTests { protected HiveConf hiveConf; protected Driver d; protected TxnStore txnHandler; + private DatabaseProduct databaseProduct; public enum Table { ACIDTBL("acidTbl"), @@ -123,6 +124,7 @@ void initHiveConf() { // written and the grouping size. Most test cases expects 2 buckets. hiveConf.set("tez.grouping.max-size", "10"); hiveConf.set("tez.grouping.min-size", "1"); + databaseProduct = determineDatabaseProduct(DatabaseProduct.DERBY_NAME, hiveConf); } void setUpInternal() throws Exception { @@ -450,10 +452,9 @@ protected void waitUntilAllTxnFinished() throws Exception { } protected int getOpenTxnCount(long openTxnTimeOutMillis) throws Exception { - int counted = TestTxnDbUtil.countQueryAgent(hiveConf, + return TestTxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXNS where TXN_STATE = '" + TxnStatus.OPEN.getSqlConst() + "' " + - "or TXN_STARTED >= (" + getEpochFn(determineDatabaseProduct(DatabaseProduct.DERBY_NAME, hiveConf)) + + "or TXN_STARTED >= (" + getEpochFn(databaseProduct) + " - " + openTxnTimeOutMillis + ")"); - return counted; } } From 6cb6bbdc032b84b6ae841fdc9f3fed1dc840e5f8 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 15:04:00 +0100 Subject: [PATCH 21/30] remove isVectorized flag --- .../hadoop/hive/ql/TestTxnAddPartition.java | 30 ++++---- .../hadoop/hive/ql/TestTxnCommands.java | 10 +-- .../hadoop/hive/ql/TestTxnCommands3.java | 17 ++-- .../hadoop/hive/ql/TestTxnConcatenate.java | 12 +-- .../apache/hadoop/hive/ql/TestTxnExIm.java | 21 +++-- .../hadoop/hive/ql/TestTxnLoadData.java | 77 +++++++++---------- .../hadoop/hive/ql/TestTxnNoBuckets.java | 5 -- .../hive/ql/TxnCommandsBaseForTests.java | 18 +++-- .../hadoop/hive/ql/util/TestUpgradeTool.java | 10 +-- 9 files changed, 95 insertions(+), 105 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java index 9edddd0c34ef..6f7d5f53bca1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java @@ -63,15 +63,15 @@ protected String getTestDataDir() { @Test - public void addPartition() throws Exception { - - addPartition(false); + public void testAddPartition() throws Exception { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); + addPartition(); } @Test - public void addPartitionVectorized() throws Exception { + public void testAddPartitionVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - addPartition(true); + addPartition(); } /** @@ -80,7 +80,7 @@ public void addPartitionVectorized() throws Exception { * adding partition when it already exists * adding partition when it already exists with "if not exists" */ - private void addPartition(boolean isVectorized) throws Exception { + private void addPartition() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) partitioned by (p int) stored as orc" + @@ -107,7 +107,7 @@ private void addPartition(boolean isVectorized) throws Exception { "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, isVectorized, "add 2 parts w/data and 1 empty", LOG); + checkResult(expected, testQuery, "add 2 parts w/data and 1 empty", LOG); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/3'"); //should be an error since p=3 exists @@ -135,18 +135,18 @@ private void addPartition(boolean isVectorized) throws Exception { "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t3\t0\t4", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}}; - checkResult(expected2, testQuery, isVectorized, "add 2 existing parts and 1 empty", LOG); + checkResult(expected2, testQuery, "add 2 existing parts and 1 empty", LOG); } @Test - public void addPartitionMM() throws Exception { - addPartitionMM(false); + public void testAddPartitionMM() throws Exception { + addPartitionMM(); } @Test - public void addPartitionMMVectorized() throws Exception { + public void testAddPartitionMMVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - addPartitionMM(true); + addPartitionMM(); } /** @@ -156,7 +156,7 @@ public void addPartitionMMVectorized() throws Exception { * adding partition when it already exists * adding partition when it already exists with "if not exists" */ - private void addPartitionMM(boolean isVectorized) throws Exception { + private void addPartitionMM() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); @@ -180,7 +180,7 @@ private void addPartitionMM(boolean isVectorized) throws Exception { {"0\t0\t4", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, {"1\t0\t2", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, {"1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, isVectorized, "add 2 parts w/data and 1 empty", LOG); + checkResult(expected, testQuery, "add 2 parts w/data and 1 empty", LOG); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/3'"); //should be an error since p=3 exists @@ -201,7 +201,7 @@ private void addPartitionMM(boolean isVectorized) throws Exception { {"1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, {"3\t0\t2", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}, {"3\t0\t4", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}}; - checkResult(expected2, testQuery, isVectorized, "add 2 existing parts and 1 empty", LOG); + checkResult(expected2, testQuery, "add 2 existing parts and 1 empty", LOG); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 95f46d33a737..bddcd7cc85eb 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -1425,15 +1425,14 @@ public void testNonAcidToAcidConversion01() throws Exception { //create a delta directory runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,17)"); - boolean isVectorized = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); - String query = "select ROW__ID, a, b" + (isVectorized ? " from " : ", INPUT__FILE__NAME from ") + Table.NONACIDORCTBL + " order by ROW__ID"; + String query = "select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.NONACIDORCTBL + " order by ROW__ID"; String[][] expected = new String[][] { {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2", "nonacidorctbl/000001_0"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t0\t12", "nonacidorctbl/000001_0_copy_1"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "nonacidorctbl/000001_0_copy_1"}, {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17", "nonacidorctbl/delta_10000001_10000001_0000/bucket_00001_0"} }; - checkResult(expected, query, isVectorized, "before compact", LOG); + checkResult(expected, query, "before compact", LOG); Assert.assertEquals(536870912, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(0))); @@ -1444,15 +1443,14 @@ public void testNonAcidToAcidConversion01() throws Exception { runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " compact 'major'"); runWorker(hiveConf); - query = "select ROW__ID, a, b" + (isVectorized ? "" : ", INPUT__FILE__NAME") + " from " - + Table.NONACIDORCTBL + " order by ROW__ID"; + query = "select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.NONACIDORCTBL + " order by ROW__ID"; String[][] expected2 = new String[][] { {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2", "nonacidorctbl/base_10000001_v0000009/bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t0\t12", "nonacidorctbl/base_10000001_v0000009/bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "nonacidorctbl/base_10000001_v0000009/bucket_00001"}, {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17", "nonacidorctbl/base_10000001_v0000009/bucket_00001"} }; - checkResult(expected2, query, isVectorized, "after major compact", LOG); + checkResult(expected2, query, "after major compact", LOG); //make sure they are the same before and after compaction } //@Ignore("see bucket_num_reducers_acid.q") diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index 363b4d6d3b18..41e2ec5ebfe5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -91,8 +91,8 @@ public void testRenameTable() throws Exception { "s/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t4\t6", "s/delta_0000002_0000002_0001/bucket_00000_0"}}; - checkResult(expected, testQuery, "check data", LOG); - + List rs = runStatementOnDriver(testQuery); + checkExpected(rs, expected, "check data", LOG); Assert.assertEquals(0, TestTxnDbUtil.countQueryAgent(hiveConf, "select count(*) from COMPLETED_TXN_COMPONENTS where CTC_TABLE='t'")); @@ -172,15 +172,13 @@ private void testDeleteEventPruning() throws Exception { List rs = runStatementOnDriver( "select ROW__ID, a, b from T order by a, b"); - boolean isVectorized = - hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b"; String[][] expected = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t4\t5", "warehouse/t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", "warehouse/t/delta_0000002_0000002_0000/bucket_00000_0"}}; - checkResult(expected, testQuery, isVectorized, "after delete", LOG); + checkResult(expected, testQuery, "after delete", LOG); runStatementOnDriver("alter table T compact 'MAJOR'"); runWorker(hiveConf); @@ -198,7 +196,7 @@ private void testDeleteEventPruning() throws Exception { "warehouse/t/base_0000003_v0000012/bucket_00000"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", "warehouse/t/base_0000003_v0000012/bucket_00000"}}; - checkResult(expected2, testQuery, isVectorized, "after compaction", LOG); + checkResult(expected2, testQuery, "after compaction", LOG); } /** * HIVE-19985 @@ -288,7 +286,7 @@ private void testSdpoBucketed(boolean isVectorized, int bucketing_version) "warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00001_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00000_0"}}; - checkResult(expected, testQuery, isVectorized, "after insert", LOG); + checkResult(expected, testQuery, "after insert", LOG); runStatementOnDriver("update acid_uap set b = 'fred'"); @@ -302,7 +300,7 @@ private void testSdpoBucketed(boolean isVectorized, int bucketing_version) "warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0001/bucket_00001_0"}, {"{\"writeid\":3,\"bucketid\":536870913,\"rowid\":0}\t2\tfred\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0001/bucket_00000_0"}}; - checkResult(expected2, testQuery, isVectorized, "after update", LOG); + checkResult(expected2, testQuery, "after update", LOG); } @Test public void testCleaner2() throws Exception { @@ -334,7 +332,8 @@ public void testCleaner2() throws Exception { "t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t4", "t/delta_0000002_0000002_0000/bucket_00000_0"}}; - checkResult(expected, testQuery, "check data", LOG); + List rs = runStatementOnDriver(testQuery); + checkExpected(rs, expected, "check data", LOG); txnMgr2 = swapTxnManager(txnMgr1); driver2 = swapDrivers(driver1); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java index dd9391dc4844..a601b48c1ee0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java @@ -62,7 +62,7 @@ public void testConcatenate() throws Exception { "acidtbl/delta_0000003_0000003_0000/bucket_00001_0"}, {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t8\t8", "acidtbl/delta_0000003_0000003_0000/bucket_00001_0"}}; - checkResult(expected, testQuery, false, "check data", LOG); + checkResult(expected, testQuery, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() but in normal usage 'concatenate' is blocking, */ @@ -86,7 +86,7 @@ public void testConcatenate() throws Exception { "acidtbl/base_0000003_v0000011/bucket_00001"}, {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t8\t8", "acidtbl/base_0000003_v0000011/bucket_00001"}}; - checkResult(expected2, testQuery, false, "check data after concatenate", LOG); + checkResult(expected2, testQuery, "check data after concatenate", LOG); } @Test public void testConcatenatePart() throws Exception { @@ -103,7 +103,7 @@ public void testConcatenatePart() throws Exception { "acidtblpart/p=p1/delta_0000003_0000003_0000/bucket_00001_0"}, {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "acidtblpart/p=p2/delta_0000003_0000003_0000/bucket_00001_0"}}; - checkResult(expected, testQuery, false, "check data", LOG); + checkResult(expected, testQuery, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() but in normal usage 'concatenate' is blocking, */ @@ -128,7 +128,7 @@ public void testConcatenatePart() throws Exception { {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "acidtblpart/p=p2/delta_0000003_0000003_0000/bucket_00001_0"}}; - checkResult(expected2, testQuery, false, "check data after concatenate", LOG); + checkResult(expected2, testQuery, "check data after concatenate", LOG); } @Test @@ -148,7 +148,7 @@ public void testConcatenateMM() throws Exception { {"4\t5", "t/delta_0000001_0000001_0000/000000_0"}, {"5\t6", "t/delta_0000002_0000002_0000/000000_0"}, {"8\t8", "t/delta_0000002_0000002_0000/000000_0"}}; - checkResult(expected, testQuery, false, "check data", LOG); + checkResult(expected, testQuery, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() but in normal usage 'concatenate' is blocking, */ @@ -168,6 +168,6 @@ public void testConcatenateMM() throws Exception { {"4\t5", "t/base_0000003_v0000011/000000_0"}, {"5\t6", "t/base_0000003_v0000011/000000_0"}, {"8\t8", "t/base_0000003_v0000011/000000_0"}}; - checkResult(expected2, testQuery, false, "check data after concatenate", LOG); + checkResult(expected2, testQuery, "check data after concatenate", LOG); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java index 7b24f3760129..fac721b79c1b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java @@ -280,7 +280,7 @@ public void testCTLT() throws Exception { */ @Test public void testImport() throws Exception { - testImport(false, true); + testImport(true); } /** * tests import where target table already exists. @@ -288,14 +288,15 @@ public void testImport() throws Exception { @Test public void testImportVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - testImport(true, true); + testImport(true); } /** * tests import where target table does not exists. */ @Test public void testImportNoTarget() throws Exception { - testImport(false, false); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); + testImport(false); } /** * MM tables already work - mm_exim.q @@ -304,7 +305,7 @@ public void testImportNoTarget() throws Exception { * If importing into existing table (un-partitioned) it must be empty. * If Import is creating a table it will be exactly like exported one except for the name. */ - private void testImport(boolean isVectorized, boolean existingTarget) throws Exception { + private void testImport(boolean existingTarget) throws Exception { dropTables("T", "Tstage"); if(existingTarget) { runStatementOnDriver("create table T (a int, b int) stored as orc"); @@ -328,7 +329,7 @@ private void testImport(boolean isVectorized, boolean existingTarget) throws Exc "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "t/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, isVectorized, "import existing table"); + checkResult(expected, testQuery, "import existing table", LOG); runStatementOnDriver("update T set a = 0 where b = 6"); String[][] expected2 = new String[][] { @@ -338,7 +339,7 @@ private void testImport(boolean isVectorized, boolean existingTarget) throws Exc "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t0\t6", "t/delta_0000002_0000002_0001/bucket_00000_0"}}; - checkResult(expected2, testQuery, isVectorized, "update imported table"); + checkResult(expected2, testQuery, "update imported table", LOG); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); @@ -349,7 +350,7 @@ private void testImport(boolean isVectorized, boolean existingTarget) throws Exc ".*t/delta_0000001_0000002_v000001[4-5]/bucket_00000"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t0\t6", ".*t/delta_0000001_0000002_v000001[4-5]/bucket_00000"}}; - checkResult(expected3, testQuery, isVectorized, "minor compact imported table"); + checkResult(expected3, testQuery, "minor compact imported table", LOG); } @@ -382,7 +383,7 @@ public void testImportPartitioned() throws Exception { "t/p=11/delta_0000002_0000002_0000/000000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/p=12/delta_0000003_0000003_0000/000000_0"}}; - checkResult(expected, testQuery, isVectorized, "import existing table"); + checkResult(expected, testQuery, "import existing table", LOG); } @Test @@ -562,10 +563,6 @@ private void testMM(boolean existingTable, boolean isSourceMM) throws Exception Assert.assertTrue(s, s.endsWith("/000000_0")); } } - private void checkResult(String[][] expectedResult, String query, boolean isVectorized, - String msg) throws Exception{ - checkResult(expectedResult, query, isVectorized, msg, LOG); - } /** * This test will fail - MM export doesn't filter out aborted transaction data. diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index fb2f7149a465..c9c65ed5ac86 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -58,42 +58,42 @@ protected String getTestDataDir() { } @Test - public void loadData() throws Exception { - loadData(false); + public void loadDataNotVectorized() throws Exception { + loadData(); } @Test public void loadDataVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - loadData(true); + loadData(); } @Test - public void loadDataUpdate() throws Exception { - loadDataUpdate(false); + public void loadDataUpdateNotVectorized() throws Exception { + loadDataUpdate(); } @Test public void loadDataUpdateVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - loadDataUpdate(true); + loadDataUpdate(); } @Test - public void loadDataNonAcid2AcidConversion() throws Exception { - loadDataNonAcid2AcidConversion(false); + public void loadDataNonAcid2AcidConversionNotVectorized() throws Exception { + loadDataNonAcid2AcidConversion(); } @Test public void loadDataNonAcid2AcidConversionVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - loadDataNonAcid2AcidConversion(true); + loadDataNonAcid2AcidConversion(); } @Test - public void testMultiStatement() throws Exception { - testMultiStatement(false); + public void testMultiStatementNotVectorized() throws Exception { + testMultiStatement(); } @Test public void testMultiStatementVectorized() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); - testMultiStatement(true); + testMultiStatement(); } - private void loadDataUpdate(boolean isVectorized) throws Exception { + private void loadDataUpdate() throws Exception { dropTables("T", "Tstage"); runStatementOnDriver( "create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); @@ -112,13 +112,13 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { String[][] expected = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, isVectorized, "load data inpath"); + checkResult(expected, testQuery, "load data inpath"); runStatementOnDriver("update T set b = 17 where a = 1"); String[][] expected2 = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0001/bucket_00000_0"} }; - checkResult(expected2, testQuery, isVectorized, "update"); + checkResult(expected2, testQuery, "update"); runStatementOnDriver("insert into T values(2,2)"); String[][] expectedInter2 = new String[][] { @@ -126,13 +126,13 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0001/bucket_00000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000003_0000003_0000/bucket_00000_0"} }; - checkResult(expectedInter2, testQuery, isVectorized, "insert"); + checkResult(expectedInter2, testQuery, "insert"); runStatementOnDriver("delete from T where a = 3"); String[][] expectedInter3 = new String[][] { {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0001/bucket_00000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000003_0000003_0000/bucket_00000_0"} }; - checkResult(expectedInter3, testQuery, isVectorized, "delete"); + checkResult(expectedInter3, testQuery, "delete"); //test minor compaction runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); @@ -140,13 +140,13 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t1\t17", "t/delta_0000001_0000004_v0000018/bucket_00000"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000001_0000004_v0000018/bucket_00000"} }; - checkResult(expected3, testQuery, isVectorized, "delete compact minor"); + checkResult(expected3, testQuery, "delete compact minor"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' overwrite into table T"); String[][] expected4 = new String[][]{ {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000005/000000_0"}, {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000005/000000_0"}}; - checkResult(expected4, testQuery, isVectorized, "load data inpath overwrite"); + checkResult(expected4, testQuery, "load data inpath overwrite"); //load same data again (additive) runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); @@ -156,7 +156,7 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { {"{\"writeid\":6,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000006_0000006_0000/000000_0"}, {"{\"writeid\":6,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000006_0000006_0000/000000_0"} }; - checkResult(expectedInt1, testQuery, isVectorized, "load data local inpath"); + checkResult(expectedInt1, testQuery, "load data local inpath"); runStatementOnDriver("update T set b = 17 where a = 1");//matches 2 rows runStatementOnDriver("delete from T where a = 3");//matches 2 rows runStatementOnDriver("insert into T values(2,2)"); @@ -165,7 +165,7 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { {"{\"writeid\":7,\"bucketid\":536936449,\"rowid\":0}\t1\t17", "t/delta_0000007_0000007_0001/bucket_00001_0"}, {"{\"writeid\":9,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000009_0000009_0000/bucket_00000_0"} }; - checkResult(expected5, testQuery, isVectorized, "load data inpath overwrite update"); + checkResult(expected5, testQuery, "load data inpath overwrite update"); //test major compaction runStatementOnDriver("alter table T compact 'major'"); @@ -175,9 +175,9 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { {"{\"writeid\":7,\"bucketid\":536936449,\"rowid\":0}\t1\t17", "t/base_0000009_v0000033/bucket_00001"}, {"{\"writeid\":9,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000009_v0000033/bucket_00000"} }; - checkResult(expected6, testQuery, isVectorized, "load data inpath compact major"); + checkResult(expected6, testQuery, "load data inpath compact major"); } - private void loadData(boolean isVectorized) throws Exception { + private void loadData() throws Exception { dropTables("T", "Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("insert into T values(0,2),(0,4)"); @@ -200,7 +200,7 @@ private void loadData(boolean isVectorized) throws Exception { //Load Data {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000002_0000002_0000/000000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000002_0000002_0000/000000_0"}}; - checkResult(expected, testQuery, isVectorized, "load data inpath"); + checkResult(expected, testQuery, "load data inpath"); //test minor compaction runStatementOnDriver("alter table T compact 'minor'"); @@ -211,7 +211,7 @@ private void loadData(boolean isVectorized) throws Exception { {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000002_v0000010/bucket_00000"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000002_v0000010/bucket_00000"} }; - checkResult(expected1, testQuery, isVectorized, "load data inpath (minor)"); + checkResult(expected1, testQuery, "load data inpath (minor)"); //test major compaction runStatementOnDriver("insert into T values(2,2)"); @@ -224,7 +224,7 @@ private void loadData(boolean isVectorized) throws Exception { {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000003_v0000015/bucket_00000"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000003_v0000015/bucket_00000"} }; - checkResult(expected2, testQuery, isVectorized, "load data inpath (major)"); + checkResult(expected2, testQuery, "load data inpath (major)"); //create more staging data and test Load Data Overwrite runStatementOnDriver("insert into Tstage values(5,6),(7,8)"); @@ -233,7 +233,7 @@ private void loadData(boolean isVectorized) throws Exception { String[][] expected3 = new String[][] { {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000004/000000_0"}, {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000004/000000_0"}}; - checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite"); + checkResult(expected3, testQuery, "load data inpath overwrite"); //one more major compaction runStatementOnDriver("insert into T values(6,6)"); @@ -243,12 +243,12 @@ private void loadData(boolean isVectorized) throws Exception { {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000005_v0000023/bucket_00000"}, {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000005_v0000023/bucket_00000"}, {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t6\t6", "t/base_0000005_v0000023/bucket_00000"}}; - checkResult(expected4, testQuery, isVectorized, "load data inpath overwrite (major)"); + checkResult(expected4, testQuery, "load data inpath overwrite (major)"); } /** * Load Data [overwrite] in to an (un-)partitioned acid converted table */ - private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Exception { + private void loadDataNonAcid2AcidConversion() throws Exception { dropTables("T", "Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='false')"); //per acid write to test nonAcid2acid conversion mixed with load data @@ -294,7 +294,7 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":1}\t5\t5", "t/delta_10000001_10000001_0000/000002_0"}, }; - checkResult(expected, testQuery, isVectorized, "load data inpath"); + checkResult(expected, testQuery, "load data inpath"); //create more staging data with copy_N files and do LD+Overwrite runStatementOnDriver("insert into Tstage values(5,6),(7,8)"); @@ -310,7 +310,7 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti "t/base_10000002/000001_0"} }; - checkResult(expected2, testQuery, isVectorized, "load data inpath overwrite"); + checkResult(expected2, testQuery, "load data inpath overwrite"); //create 1 more delta_x_x so that compactor has > dir file to compact runStatementOnDriver("insert into T values(9,9)"); @@ -327,7 +327,7 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t9\t9", "t/base_10000003_v0000013/bucket_00000"} }; - checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite (major)"); + checkResult(expected3, testQuery, "load data inpath overwrite (major)"); } /** * Load Data [overwrite] in to a partitioned transactional table @@ -418,7 +418,7 @@ public void testMMOrcTable() throws Exception { * Make sure Load Data assigns ROW_IDs correctly when there is statementId suffix on delta dir * For example, delta_x_x_0001. */ - private void testMultiStatement(boolean isVectorized) throws Exception { + private void testMultiStatement() throws Exception { dropTables("T", "Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data @@ -442,7 +442,7 @@ private void testMultiStatement(boolean isVectorized) throws Exception { {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t5\t5", "t/delta_0000001_0000001_0001/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/delta_0000001_0000001_0001/000000_0"} }; - checkResult(expected, testQuery, isVectorized, "load data inpath"); + checkResult(expected, testQuery, "load data inpath"); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); @@ -452,7 +452,7 @@ private void testMultiStatement(boolean isVectorized) throws Exception { {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t5\t5", "t/base_0000001_v0000009/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/base_0000001_v0000009/bucket_00000"} }; - checkResult(expected2, testQuery, isVectorized, "load data inpath (major)"); + checkResult(expected2, testQuery, "load data inpath (major)"); //at lest for now, Load Data w/Overwrite is not allowed in a txn: HIVE-18154 } @@ -480,11 +480,10 @@ public void testAbort() throws Exception { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000_0"} }; - checkResult(expected, testQuery, isVectorized, "load data inpath"); + checkResult(expected, testQuery, "load data inpath"); } - void checkResult(String[][] expectedResult, String query, boolean isVectorized, - String msg) throws Exception{ - checkResult(expectedResult, query, isVectorized, msg, LOG); + void checkResult(String[][] expectedResult, String query, String msg) throws Exception { + checkResult(expectedResult, query, msg, LOG); } @Test public void testLoadAcidFile() throws Exception { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index c7b9c5622703..e0452ca45e72 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -63,10 +63,6 @@ public void setUp() throws Exception { //see TestTxnNoBucketsVectorized for vectorized version hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); } - - private boolean shouldVectorize() { - return hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); - } /** * Tests that Acid can work with un-bucketed tables. */ @@ -172,7 +168,6 @@ public void testNoBuckets() throws Exception { checkResult(expected, "select ROW__ID, c1, c2, c3" + (shouldVectorize() ? "" : ", INPUT__FILE__NAME") + " from " + NO_BUCKETS_TBL_NAME + " order by c1, c2, c3", - shouldVectorize(), "After Major Compaction", LOG); expectedFiles.clear(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 3f9d83855e17..7aa003b8fa10 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -364,7 +364,9 @@ protected void assertMapperExecutionMode(String query, String message, Predicate } Map mapVertex = (Map) vertexEntry.getValue(); String executionMode = (String) mapVertex.get("Execution mode"); - Assert.assertTrue(message + rs.get(0), predicate.test(executionMode)); + boolean vectorized = isNotBlank(executionMode) && executionMode.contains("vectorized"); + Assert.assertTrue(message + rs.get(0), + hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) ^ vectorized); } } /** @@ -416,19 +418,15 @@ void logResult(Logger LOG, List rs) { * which will currently make the query non-vectorizable. This means we can't check the file name * for vectorized version of the test. */ - protected void checkResult(String[][] expectedResult, String query, boolean isVectorized, String msg, Logger LOG) throws Exception{ + protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) throws Exception{ List rs = runStatementOnDriver(query); - checkExpected(rs, expectedResult, msg + (isVectorized ? " vect" : ""), LOG); - if (isVectorized) { + checkExpected(rs, expectedResult, msg + (shouldVectorize() ? " vect" : ""), LOG); + if (shouldVectorize()) { assertMappersAreVectorized(query); } else { assertMappersAreNotVectorized(query); } } - protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) throws Exception{ - List rs = runStatementOnDriver(query); - checkExpected(rs, expectedResult, msg, LOG); - } void dropTables(String... tables) throws Exception { HiveConf queryConf = d.getQueryState().getConf(); queryConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); @@ -457,4 +455,8 @@ protected int getOpenTxnCount(long openTxnTimeOutMillis) throws Exception { "or TXN_STARTED >= (" + getEpochFn(databaseProduct) + " - " + openTxnTimeOutMillis + ")"); } + + protected boolean shouldVectorize() { + return hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java b/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java index 4f24454056b7..111e8a3d4a18 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java @@ -121,7 +121,7 @@ public void testPostUpgrade() throws Exception { {"4\t5",""}, {"5\t6",""}, }; - checkResult(expected0, testQuery0, true, "TFlat pre-check", LOG); + checkResult(expected0, testQuery0, "TFlat pre-check", LOG); //should be converted to MM @@ -184,7 +184,7 @@ public void testPostUpgrade() throws Exception { {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "tacid/delta_0000002_0000002/000000_0"} }; - checkResult(expected, testQuery, false, "TAcid post-check", LOG); + checkResult(expected, testQuery, "TAcid post-check", LOG); testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from TAcidPart order by a, b, p, ROW__ID"; @@ -202,7 +202,7 @@ public void testPostUpgrade() throws Exception { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "tacidpart/p=12/delta_0000001_0000001/000000_0"} }; - checkResult(expected2, testQuery, false, "TAcidPart post-check", LOG); + checkResult(expected2, testQuery, "TAcidPart post-check", LOG); /* Verify that we re-arranged/renamed so that files names follow hive naming convention and are spread among deltas/buckets @@ -220,7 +220,7 @@ public void testPostUpgrade() throws Exception { {"4\t5"}, {"5\t6"} }; - checkResult(expectedData, testQuery, true, "TFlat post-check data", LOG); + checkResult(expectedData, testQuery, "TFlat post-check data", LOG); testQuery = "select ROW__ID, INPUT__FILE__NAME from TFlat order by INPUT__FILE__NAME"; String[][] expectedMetaData = new String[][] { @@ -235,7 +235,7 @@ public void testPostUpgrade() throws Exception { {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}", "tflat/delta_0000005_0000005/00000_0"} }; - checkResult(expectedMetaData, testQuery, false, "TFlat post-check files", LOG); + checkResult(expectedMetaData, testQuery, "TFlat post-check files", LOG); } @Test public void testGuessNumBuckets() { From f141ba27b9b806d81e83bb2282def6e75b8c0987 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Thu, 5 Dec 2024 15:32:24 +0100 Subject: [PATCH 22/30] move back HiveConfForTest --- .../org/apache/hadoop/hive/conf/HiveConfForTest.java | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename common/src/{java => test}/org/apache/hadoop/hive/conf/HiveConfForTest.java (100%) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConfForTest.java b/common/src/test/org/apache/hadoop/hive/conf/HiveConfForTest.java similarity index 100% rename from common/src/java/org/apache/hadoop/hive/conf/HiveConfForTest.java rename to common/src/test/org/apache/hadoop/hive/conf/HiveConfForTest.java From 094aad849c1ab01fc2f71e3fc901a9074eac616f Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Fri, 6 Dec 2024 10:18:34 +0100 Subject: [PATCH 23/30] remove isVectorized flag #2 --- .../hadoop/hive/ql/TestTxnAddPartition.java | 1 + .../hadoop/hive/ql/TestTxnNoBuckets.java | 14 ++++----- .../hive/ql/TestTxnNoBucketsVectorized.java | 5 ---- .../hive/ql/TxnCommandsBaseForTests.java | 30 ++++--------------- 4 files changed, 12 insertions(+), 38 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java index 6f7d5f53bca1..0ebe5110f923 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java @@ -140,6 +140,7 @@ private void addPartition() throws Exception { @Test public void testAddPartitionMM() throws Exception { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); addPartitionMM(); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index e0452ca45e72..c4f6f9e63075 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -166,7 +166,7 @@ public void testNoBuckets() throws Exception { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000011/bucket_00000"} }; checkResult(expected, - "select ROW__ID, c1, c2, c3" + (shouldVectorize() ? "" : ", INPUT__FILE__NAME") + "select ROW__ID, c1, c2, c3, INPUT__FILE__NAME" + " from " + NO_BUCKETS_TBL_NAME + " order by c1, c2, c3", "After Major Compaction", LOG); @@ -440,9 +440,9 @@ logical bucket (tranche) Assert.assertEquals(2, BucketCodec.determineVersion(537001984).decodeWriterId(537001984)); Assert.assertEquals(1, BucketCodec.determineVersion(536936448).decodeWriterId(536936448)); - assertVectorized("update T set b = 88 where b = 80"); + assertMappersAreVectorized("update T set b = 88 where b = 80"); runStatementOnDriver("update T set b = 88 where b = 80"); - assertVectorized("delete from T where b = 8"); + assertMappersAreVectorized("delete from T where b = 8"); runStatementOnDriver("delete from T where b = 8"); String expected3[][] = { {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/t/HIVE_UNION_SUBDIR_1/000000_0"}, @@ -488,10 +488,6 @@ logical bucket (tranche) checkExpected(rs, expected4,"after major compact"); } - protected void assertVectorized(String query) throws Exception { - assertMappersAreNotVectorized(query); - } - @Test public void testInsertFromUnion() throws Exception { int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}}; @@ -716,7 +712,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { checkExpected(rs, expected3, "After non-vectorized read"); Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912)); //vectorized because there is INPUT__FILE__NAME - assertMappersAreNotVectorized(query); + assertMappersAreVectorized(query); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); runStatementOnDriver("update T set b = 17 where a = 1"); @@ -758,7 +754,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { }; checkExpected(rs, expected5, "After major compaction"); //vectorized because there is INPUT__FILE__NAME - assertMappersAreNotVectorized(query); + assertMappersAreVectorized(query); } private void checkExpected(List rs, String[][] expected, String msg) { super.checkExpected(rs, expected, msg, LOG); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java index 69b39e5d76a0..cd2d2306123c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java @@ -34,9 +34,4 @@ public void setUp() throws Exception { setUpInternal(); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); } - - @Override - protected void assertVectorized(String query) throws Exception { - assertMappersAreVectorized(query); - } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 7aa003b8fa10..c5e2e444bbf4 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -27,7 +27,6 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -66,7 +65,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.hadoop.hive.metastore.DatabaseProduct.determineDatabaseProduct; import static org.apache.hadoop.hive.metastore.txn.TxnUtils.getEpochFn; @@ -321,20 +319,7 @@ protected CommandProcessorException runStatementOnDriverNegative(String stmt) { throw new RuntimeException("Didn't get expected failure!"); } - protected void assertMappersAreVectorized(String query) throws Exception { - if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - return; - } - assertMapperExecutionMode(query, "Mapper was not vectorized: ", - executionMode -> isBlank(executionMode) || !executionMode.contains("vectorized")); - } - - protected void assertMappersAreNotVectorized(String query) throws Exception { - assertMapperExecutionMode(query, "Mapper was vectorized but was not expected: ", - executionMode -> isNotBlank(executionMode) && executionMode.contains("vectorized")); - } - - protected void assertMapperExecutionMode(String query, String message, Predicate predicate) + protected void assertMappersAreVectorized(String query) throws Exception { List rs = runStatementOnDriver("EXPLAIN FORMATTED VECTORIZATION DETAIL " + query); ObjectMapper objectMapper = new ObjectMapper(); @@ -365,8 +350,9 @@ protected void assertMapperExecutionMode(String query, String message, Predicate Map mapVertex = (Map) vertexEntry.getValue(); String executionMode = (String) mapVertex.get("Execution mode"); boolean vectorized = isNotBlank(executionMode) && executionMode.contains("vectorized"); + String message = "Mapper was " + (shouldVectorized() ? "not vectorized: " : "vectorized but was not expected: "); Assert.assertTrue(message + rs.get(0), - hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) ^ vectorized); + shouldVectorized() ^ vectorized); } } /** @@ -420,12 +406,8 @@ void logResult(Logger LOG, List rs) { */ protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) throws Exception{ List rs = runStatementOnDriver(query); - checkExpected(rs, expectedResult, msg + (shouldVectorize() ? " vect" : ""), LOG); - if (shouldVectorize()) { - assertMappersAreVectorized(query); - } else { - assertMappersAreNotVectorized(query); - } + checkExpected(rs, expectedResult, msg + (shouldVectorized() ? " vect" : ""), LOG); + assertMappersAreVectorized(query); } void dropTables(String... tables) throws Exception { HiveConf queryConf = d.getQueryState().getConf(); @@ -456,7 +438,7 @@ protected int getOpenTxnCount(long openTxnTimeOutMillis) throws Exception { " - " + openTxnTimeOutMillis + ")"); } - protected boolean shouldVectorize() { + protected boolean shouldVectorized() { return hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); } } From c94cb72f6f7ac04474d2650afa4323abf3033f82 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Fri, 6 Dec 2024 11:08:29 +0100 Subject: [PATCH 24/30] rollback to debug logging --- .../txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java index dee11031fb67..0407729af01b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/jdbc/functions/MinOpenTxnIdWaterMarkFunction.java @@ -61,7 +61,7 @@ public Long execute(MultiDataSourceJdbcResource jdbcResource) throws MetaExcepti return id; })); long lowWaterMark = jdbcResource.execute(new OpenTxnTimeoutLowBoundaryTxnIdHandler(openTxnTimeOutMillis)); - LOG.info("MinOpenTxnIdWaterMark calculated with minOpenTxn {}, lowWaterMark {}", minOpenTxn, lowWaterMark); + LOG.debug("MinOpenTxnIdWaterMark calculated with minOpenTxn {}, lowWaterMark {}", minOpenTxn, lowWaterMark); return Long.min(minOpenTxn, lowWaterMark + 1); } } From fb96f80b841da32148735ea684c42de32ccebac3 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Fri, 6 Dec 2024 15:16:25 +0100 Subject: [PATCH 25/30] remove testFileSystemUnCaching --- .../hadoop/hive/ql/TestTxnCommands2.java | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 9387c9b22bc1..483d8521440e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -1315,50 +1315,6 @@ public void testInitiatorWithMinorCompactionForInsertOnlyTable() throws Exceptio verifyBaseDir(1, tblName, ""); } - /** - * Make sure there's no FileSystem$Cache$Key leak due to UGI use - * @throws Exception - */ - @Ignore("FileSystem.closeAllForUGI is never called from Compaction related threads") - @Test - public void testFileSystemUnCaching() throws Exception { - int cacheSizeBefore; - int cacheSizeAfter; - - // get the size of cache BEFORE - cacheSizeBefore = getFileSystemCacheSize(); - - // Insert a row to ACID table - runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(1,2)"); - - // Perform a major compaction - runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'major'"); - runWorker(hiveConf); - runCleaner(hiveConf); - - // get the size of cache AFTER - cacheSizeAfter = getFileSystemCacheSize(); - - Assert.assertEquals(cacheSizeBefore, cacheSizeAfter); - } - - private int getFileSystemCacheSize() throws Exception { - try { - Field cache = FileSystem.class.getDeclaredField("CACHE"); - cache.setAccessible(true); - Object o = cache.get(null); // FileSystem.CACHE - - Field mapField = o.getClass().getDeclaredField("map"); - mapField.setAccessible(true); - Map map = (HashMap)mapField.get(o); // FileSystem.CACHE.map - - return map.size(); - } catch (NoSuchFieldException e) { - System.out.println(e); - } - return 0; - } - private static class CompactionsByState { private int didNotInitiate; private int failed; From 64b541973a399d409783405398865a79d90ddbe5 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Sat, 7 Dec 2024 09:51:10 +0100 Subject: [PATCH 26/30] add stacktrace to diagnostics --- .../hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java index c10f134c95b4..c145eb7dbdef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java @@ -279,8 +279,9 @@ public int monitorExecution() { } catch (IOException | TezException tezException) { // best effort } - console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e)); - diagnostics.append(e.getMessage()); + String reportedException = "Execution has failed, stack trace: " + ExceptionUtils.getStackTrace(e); + console.printError(reportedException); + diagnostics.append(reportedException); rc = 1; done = true; } else { From 7234adee99355d5aec80e975e664328c1479543b Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Sat, 7 Dec 2024 11:20:29 +0100 Subject: [PATCH 27/30] rename checkResult to checkResultAndVectorization and add new checkResult without vectorization check --- .../apache/hadoop/hive/ql/TestTxnAddPartition.java | 8 ++++---- .../org/apache/hadoop/hive/ql/TestTxnCommands.java | 4 ++-- .../apache/hadoop/hive/ql/TestTxnCommands3.java | 14 ++++++-------- .../apache/hadoop/hive/ql/TestTxnConcatenate.java | 12 ++++++------ .../org/apache/hadoop/hive/ql/TestTxnExIm.java | 8 ++++---- .../org/apache/hadoop/hive/ql/TestTxnLoadData.java | 2 +- .../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 2 +- .../hadoop/hive/ql/TxnCommandsBaseForTests.java | 9 +++++++-- .../hadoop/hive/ql/util/TestUpgradeTool.java | 10 +++++----- 9 files changed, 36 insertions(+), 33 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java index 0ebe5110f923..59766990be60 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java @@ -107,7 +107,7 @@ private void addPartition() throws Exception { "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, "add 2 parts w/data and 1 empty", LOG); + checkResultAndVectorization(expected, testQuery, "add 2 parts w/data and 1 empty", LOG); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/3'"); //should be an error since p=3 exists @@ -135,7 +135,7 @@ private void addPartition() throws Exception { "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t3\t0\t4", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}}; - checkResult(expected2, testQuery, "add 2 existing parts and 1 empty", LOG); + checkResultAndVectorization(expected2, testQuery, "add 2 existing parts and 1 empty", LOG); } @Test @@ -181,7 +181,7 @@ private void addPartitionMM() throws Exception { {"0\t0\t4", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, {"1\t0\t2", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, {"1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, "add 2 parts w/data and 1 empty", LOG); + checkResultAndVectorization(expected, testQuery, "add 2 parts w/data and 1 empty", LOG); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/3'"); //should be an error since p=3 exists @@ -202,7 +202,7 @@ private void addPartitionMM() throws Exception { {"1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, {"3\t0\t2", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}, {"3\t0\t4", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}}; - checkResult(expected2, testQuery, "add 2 existing parts and 1 empty", LOG); + checkResultAndVectorization(expected2, testQuery, "add 2 existing parts and 1 empty", LOG); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index bddcd7cc85eb..9e21a19dbe4b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -1432,7 +1432,7 @@ public void testNonAcidToAcidConversion01() throws Exception { {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "nonacidorctbl/000001_0_copy_1"}, {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17", "nonacidorctbl/delta_10000001_10000001_0000/bucket_00001_0"} }; - checkResult(expected, query, "before compact", LOG); + checkResultAndVectorization(expected, query, "before compact", LOG); Assert.assertEquals(536870912, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(0))); @@ -1450,7 +1450,7 @@ public void testNonAcidToAcidConversion01() throws Exception { {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "nonacidorctbl/base_10000001_v0000009/bucket_00001"}, {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17", "nonacidorctbl/base_10000001_v0000009/bucket_00001"} }; - checkResult(expected2, query, "after major compact", LOG); + checkResultAndVectorization(expected2, query, "after major compact", LOG); //make sure they are the same before and after compaction } //@Ignore("see bucket_num_reducers_acid.q") diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index 41e2ec5ebfe5..454124591b97 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -91,8 +91,7 @@ public void testRenameTable() throws Exception { "s/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t4\t6", "s/delta_0000002_0000002_0001/bucket_00000_0"}}; - List rs = runStatementOnDriver(testQuery); - checkExpected(rs, expected, "check data", LOG); + checkResult(expected, testQuery, "check data", LOG); Assert.assertEquals(0, TestTxnDbUtil.countQueryAgent(hiveConf, "select count(*) from COMPLETED_TXN_COMPONENTS where CTC_TABLE='t'")); @@ -178,7 +177,7 @@ private void testDeleteEventPruning() throws Exception { "warehouse/t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", "warehouse/t/delta_0000002_0000002_0000/bucket_00000_0"}}; - checkResult(expected, testQuery, "after delete", LOG); + checkResultAndVectorization(expected, testQuery, "after delete", LOG); runStatementOnDriver("alter table T compact 'MAJOR'"); runWorker(hiveConf); @@ -196,7 +195,7 @@ private void testDeleteEventPruning() throws Exception { "warehouse/t/base_0000003_v0000012/bucket_00000"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", "warehouse/t/base_0000003_v0000012/bucket_00000"}}; - checkResult(expected2, testQuery, "after compaction", LOG); + checkResultAndVectorization(expected2, testQuery, "after compaction", LOG); } /** * HIVE-19985 @@ -286,7 +285,7 @@ private void testSdpoBucketed(boolean isVectorized, int bucketing_version) "warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00001_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00000_0"}}; - checkResult(expected, testQuery, "after insert", LOG); + checkResultAndVectorization(expected, testQuery, "after insert", LOG); runStatementOnDriver("update acid_uap set b = 'fred'"); @@ -300,7 +299,7 @@ private void testSdpoBucketed(boolean isVectorized, int bucketing_version) "warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0001/bucket_00001_0"}, {"{\"writeid\":3,\"bucketid\":536870913,\"rowid\":0}\t2\tfred\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0001/bucket_00000_0"}}; - checkResult(expected2, testQuery, "after update", LOG); + checkResultAndVectorization(expected2, testQuery, "after update", LOG); } @Test public void testCleaner2() throws Exception { @@ -332,8 +331,7 @@ public void testCleaner2() throws Exception { "t/delta_0000001_0000001_0000/bucket_00000_0"}, {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t4", "t/delta_0000002_0000002_0000/bucket_00000_0"}}; - List rs = runStatementOnDriver(testQuery); - checkExpected(rs, expected, "check data", LOG); + checkResult(expected, testQuery, "check data", LOG); txnMgr2 = swapTxnManager(txnMgr1); driver2 = swapDrivers(driver1); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java index a601b48c1ee0..1fabfcb2e52c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java @@ -62,7 +62,7 @@ public void testConcatenate() throws Exception { "acidtbl/delta_0000003_0000003_0000/bucket_00001_0"}, {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t8\t8", "acidtbl/delta_0000003_0000003_0000/bucket_00001_0"}}; - checkResult(expected, testQuery, "check data", LOG); + checkResultAndVectorization(expected, testQuery, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() but in normal usage 'concatenate' is blocking, */ @@ -86,7 +86,7 @@ public void testConcatenate() throws Exception { "acidtbl/base_0000003_v0000011/bucket_00001"}, {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t8\t8", "acidtbl/base_0000003_v0000011/bucket_00001"}}; - checkResult(expected2, testQuery, "check data after concatenate", LOG); + checkResultAndVectorization(expected2, testQuery, "check data after concatenate", LOG); } @Test public void testConcatenatePart() throws Exception { @@ -103,7 +103,7 @@ public void testConcatenatePart() throws Exception { "acidtblpart/p=p1/delta_0000003_0000003_0000/bucket_00001_0"}, {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "acidtblpart/p=p2/delta_0000003_0000003_0000/bucket_00001_0"}}; - checkResult(expected, testQuery, "check data", LOG); + checkResultAndVectorization(expected, testQuery, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() but in normal usage 'concatenate' is blocking, */ @@ -128,7 +128,7 @@ public void testConcatenatePart() throws Exception { {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "acidtblpart/p=p2/delta_0000003_0000003_0000/bucket_00001_0"}}; - checkResult(expected2, testQuery, "check data after concatenate", LOG); + checkResultAndVectorization(expected2, testQuery, "check data after concatenate", LOG); } @Test @@ -148,7 +148,7 @@ public void testConcatenateMM() throws Exception { {"4\t5", "t/delta_0000001_0000001_0000/000000_0"}, {"5\t6", "t/delta_0000002_0000002_0000/000000_0"}, {"8\t8", "t/delta_0000002_0000002_0000/000000_0"}}; - checkResult(expected, testQuery, "check data", LOG); + checkResultAndVectorization(expected, testQuery, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() but in normal usage 'concatenate' is blocking, */ @@ -168,6 +168,6 @@ public void testConcatenateMM() throws Exception { {"4\t5", "t/base_0000003_v0000011/000000_0"}, {"5\t6", "t/base_0000003_v0000011/000000_0"}, {"8\t8", "t/base_0000003_v0000011/000000_0"}}; - checkResult(expected2, testQuery, "check data after concatenate", LOG); + checkResultAndVectorization(expected2, testQuery, "check data after concatenate", LOG); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java index fac721b79c1b..ac2966463011 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java @@ -329,7 +329,7 @@ private void testImport(boolean existingTarget) throws Exception { "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "t/delta_0000001_0000001_0000/000000_0"}}; - checkResult(expected, testQuery, "import existing table", LOG); + checkResultAndVectorization(expected, testQuery, "import existing table", LOG); runStatementOnDriver("update T set a = 0 where b = 6"); String[][] expected2 = new String[][] { @@ -339,7 +339,7 @@ private void testImport(boolean existingTarget) throws Exception { "t/delta_0000001_0000001_0000/000000_0"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t0\t6", "t/delta_0000002_0000002_0001/bucket_00000_0"}}; - checkResult(expected2, testQuery, "update imported table", LOG); + checkResultAndVectorization(expected2, testQuery, "update imported table", LOG); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); @@ -350,7 +350,7 @@ private void testImport(boolean existingTarget) throws Exception { ".*t/delta_0000001_0000002_v000001[4-5]/bucket_00000"}, {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t0\t6", ".*t/delta_0000001_0000002_v000001[4-5]/bucket_00000"}}; - checkResult(expected3, testQuery, "minor compact imported table", LOG); + checkResultAndVectorization(expected3, testQuery, "minor compact imported table", LOG); } @@ -383,7 +383,7 @@ public void testImportPartitioned() throws Exception { "t/p=11/delta_0000002_0000002_0000/000000_0"}, {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/p=12/delta_0000003_0000003_0000/000000_0"}}; - checkResult(expected, testQuery, "import existing table", LOG); + checkResultAndVectorization(expected, testQuery, "import existing table", LOG); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index c9c65ed5ac86..8bd40171e9ac 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -483,7 +483,7 @@ public void testAbort() throws Exception { checkResult(expected, testQuery, "load data inpath"); } void checkResult(String[][] expectedResult, String query, String msg) throws Exception { - checkResult(expectedResult, query, msg, LOG); + checkResultAndVectorization(expectedResult, query, msg, LOG); } @Test public void testLoadAcidFile() throws Exception { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index c4f6f9e63075..99daa9e78cd6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -165,7 +165,7 @@ public void testNoBuckets() throws Exception { {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000011/bucket_00001"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000011/bucket_00000"} }; - checkResult(expected, + checkResultAndVectorization(expected, "select ROW__ID, c1, c2, c3, INPUT__FILE__NAME" + " from " + NO_BUCKETS_TBL_NAME + " order by c1, c2, c3", "After Major Compaction", LOG); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index c5e2e444bbf4..21f7ddfadf3b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -404,10 +404,15 @@ void logResult(Logger LOG, List rs) { * which will currently make the query non-vectorizable. This means we can't check the file name * for vectorized version of the test. */ - protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) throws Exception{ + protected void checkResultAndVectorization(String[][] expectedResult, String query, String msg, Logger LOG) + throws Exception { + checkResult(expectedResult, query, msg, LOG); + assertMappersAreVectorized(query); + } + protected void checkResult(String[][] expectedResult, String query, String msg, Logger LOG) + throws Exception { List rs = runStatementOnDriver(query); checkExpected(rs, expectedResult, msg + (shouldVectorized() ? " vect" : ""), LOG); - assertMappersAreVectorized(query); } void dropTables(String... tables) throws Exception { HiveConf queryConf = d.getQueryState().getConf(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java b/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java index 111e8a3d4a18..af70710618b1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/util/TestUpgradeTool.java @@ -121,7 +121,7 @@ public void testPostUpgrade() throws Exception { {"4\t5",""}, {"5\t6",""}, }; - checkResult(expected0, testQuery0, "TFlat pre-check", LOG); + checkResultAndVectorization(expected0, testQuery0, "TFlat pre-check", LOG); //should be converted to MM @@ -184,7 +184,7 @@ public void testPostUpgrade() throws Exception { {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "tacid/delta_0000002_0000002/000000_0"} }; - checkResult(expected, testQuery, "TAcid post-check", LOG); + checkResultAndVectorization(expected, testQuery, "TAcid post-check", LOG); testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from TAcidPart order by a, b, p, ROW__ID"; @@ -202,7 +202,7 @@ public void testPostUpgrade() throws Exception { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "tacidpart/p=12/delta_0000001_0000001/000000_0"} }; - checkResult(expected2, testQuery, "TAcidPart post-check", LOG); + checkResultAndVectorization(expected2, testQuery, "TAcidPart post-check", LOG); /* Verify that we re-arranged/renamed so that files names follow hive naming convention and are spread among deltas/buckets @@ -220,7 +220,7 @@ public void testPostUpgrade() throws Exception { {"4\t5"}, {"5\t6"} }; - checkResult(expectedData, testQuery, "TFlat post-check data", LOG); + checkResultAndVectorization(expectedData, testQuery, "TFlat post-check data", LOG); testQuery = "select ROW__ID, INPUT__FILE__NAME from TFlat order by INPUT__FILE__NAME"; String[][] expectedMetaData = new String[][] { @@ -235,7 +235,7 @@ public void testPostUpgrade() throws Exception { {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}", "tflat/delta_0000005_0000005/00000_0"} }; - checkResult(expectedMetaData, testQuery, "TFlat post-check files", LOG); + checkResultAndVectorization(expectedMetaData, testQuery, "TFlat post-check files", LOG); } @Test public void testGuessNumBuckets() { From eff1cb7acba5c4a0d1c3b1b544b9102a5be66aa9 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Sat, 7 Dec 2024 15:18:58 +0100 Subject: [PATCH 28/30] create more than one bucket when updating converted table --- .../org/apache/hadoop/hive/ql/TestTxnNoBuckets.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index 99daa9e78cd6..7e8c5e7e1b8f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -440,8 +440,8 @@ logical bucket (tranche) Assert.assertEquals(2, BucketCodec.determineVersion(537001984).decodeWriterId(537001984)); Assert.assertEquals(1, BucketCodec.determineVersion(536936448).decodeWriterId(536936448)); - assertMappersAreVectorized("update T set b = 88 where b = 80"); - runStatementOnDriver("update T set b = 88 where b = 80"); + assertMappersAreVectorized("update T set b = 88 where b = 80 or b = 60"); + runStatementOnDriver("update T set b = 88 where b = 80 or b = 60"); assertMappersAreVectorized("delete from T where b = 8"); runStatementOnDriver("delete from T where b = 8"); String expected3[][] = { @@ -452,8 +452,7 @@ logical bucket (tranche) {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t12\t12", "warehouse/t/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"}, - // update for "{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t60\t80" + {"{\"writeid\":10000001,\"bucketid\":537067521,\"rowid\":0}\t50\t88", "warehouse/t/delta_10000001_10000001_0001/bucket_00003_0"}, {"{\"writeid\":10000001,\"bucketid\":536870913,\"rowid\":0}\t60\t88", "warehouse/t/delta_10000001_10000001_0001/bucket_00000_0"}, }; rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME"); @@ -480,8 +479,8 @@ logical bucket (tranche) "warehouse/t/base_10000002_v0000015/bucket_00000"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_10000002_v0000015/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", - "warehouse/t/base_10000002_v0000015/bucket_00000"}, + {"{\"writeid\":10000001,\"bucketid\":537067521,\"rowid\":0}\t50\t88", + "warehouse/t/base_10000002_v0000015/bucket_00003"}, {"{\"writeid\":10000001,\"bucketid\":536870913,\"rowid\":0}\t60\t88", "warehouse/t/base_10000002_v0000015/bucket_00000"}, }; From fe6edd3304dfd66f172a4ca9fd66c77ce904f9f6 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 9 Dec 2024 14:31:40 +0100 Subject: [PATCH 29/30] remove duplicates --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index 454124591b97..f1b16e5fe137 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -252,13 +252,9 @@ public void testAcidMetaColumsDecode() throws Exception { @Test public void testSdpoBucketed() throws Exception { testSdpoBucketed(true, 1); - testSdpoBucketed(true, 1); - testSdpoBucketed(false, 1); testSdpoBucketed(false, 1); testSdpoBucketed(true, 2); - testSdpoBucketed(true, 2); - testSdpoBucketed(false, 2); testSdpoBucketed(false, 2); } private void testSdpoBucketed(boolean isVectorized, int bucketing_version) From c6973f9f43b35801050f8968bf2fd0fa0767a8db Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Tue, 10 Dec 2024 10:18:35 +0100 Subject: [PATCH 30/30] sonar --- .../hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java | 4 +--- .../test/org/apache/hadoop/hive/ql/TestTxnCommands3.java | 4 ++-- .../apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java | 7 ------- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java index f453aa3bd527..fdbb454de413 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java @@ -51,9 +51,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -100,9 +98,9 @@ protected void setupWithConf(HiveConfForTest hiveConf) throws Exception { } hiveConf.setVar(HiveConf.ConfVars.PRE_EXEC_HOOKS, ""); hiveConf.setVar(HiveConf.ConfVars.POST_EXEC_HOOKS, ""); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_WAREHOUSE, testWarehouseDir); hiveConf.setVar(HiveConf.ConfVars.HIVE_INPUT_FORMAT, HiveInputFormat.class.getName()); hiveConf.setVar(HiveConf.ConfVars.HIVE_FETCH_TASK_CONVERSION, "none"); + MetastoreConf.setVar(hiveConf, MetastoreConf.ConfVars.WAREHOUSE, testWarehouseDir); MetastoreConf.setTimeVar(hiveConf, MetastoreConf.ConfVars.TXN_OPENTXN_TIMEOUT, 2, TimeUnit.SECONDS); MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.COMPACTOR_INITIATOR_ON, true); MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.COMPACTOR_CLEANER_ON, true); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index f1b16e5fe137..a2446d63ad15 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -257,13 +257,13 @@ public void testSdpoBucketed() throws Exception { testSdpoBucketed(true, 2); testSdpoBucketed(false, 2); } - private void testSdpoBucketed(boolean isVectorized, int bucketing_version) + private void testSdpoBucketed(boolean isVectorized, int bucketingVersion) throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorized); runStatementOnDriver("drop table if exists acid_uap"); runStatementOnDriver("create transactional table acid_uap(a int, b varchar(128)) " + "partitioned by (ds string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES " + - "('bucketing_version'='" + bucketing_version + "')"); + "('bucketing_version'='" + bucketingVersion + "')"); runStatementOnDriver("insert into table acid_uap partition (ds='tomorrow') " + "values (1, 'bah'),(2, 'yah')"); runStatementOnDriver("insert into table acid_uap partition (ds='today') " + diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java index cd2d2306123c..033b22e2d762 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBucketsVectorized.java @@ -18,14 +18,7 @@ package org.apache.hadoop.hive.ql; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.io.BucketCodec; -import org.junit.Assert; import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; public class TestTxnNoBucketsVectorized extends TestTxnNoBuckets { @Before