From 972856510f55cf9b633bd06a8ae6428239e7d52d Mon Sep 17 00:00:00 2001 From: "David W. Streever" Date: Thu, 10 Aug 2023 10:29:19 -0400 Subject: [PATCH] `u3e` prototype for MySQL Metastore Installs. --- README.md | 49 ++-- dependency-reduced-pom.xml | 274 ++++++++++++++++++ pom.xml | 50 ++-- src/main/java/com/cloudera/utils/Sre.java | 5 +- .../utils/hive/HiveFrameworkCheck.java | 16 +- .../utils/hive/config/SreProcessesConfig.java | 1 - .../utils/hive/sre/ConnectionPools.java | 185 ++++++------ .../hive/sre/MetastoreActionProcess.java | 155 ++++++++++ .../utils/hive/sre/ProcessContainer.java | 27 +- .../utils/hive/sre/SreProcessBase.java | 10 +- .../cloudera/utils/sql/QueryDefinition.java | 9 + src/main/resources/log4j.properties | 1 + .../resources/u3/MYSQL/hive_u3_queries.yaml | 157 +++++++--- .../resources/u3/ORACLE/hive_u3_queries.yaml | 194 +++++++++---- .../u3/POSTGRES/hive_u3_queries.yaml | 153 +++++++--- src/main/resources/u3/proc/hive_u3_procs.yaml | 208 ++++++------- .../resources/u3e/MYSQL/hive_u3e_queries.yaml | 133 +++++++++ .../resources/u3e/proc/hive_u3e_procs.yaml | 51 ++++ .../utils/hive/config/SreSQLMySqlTest.java | 8 + u3.md | 12 +- u3e.md | 29 ++ 21 files changed, 1340 insertions(+), 387 deletions(-) create mode 100644 dependency-reduced-pom.xml create mode 100644 src/main/java/com/cloudera/utils/hive/sre/MetastoreActionProcess.java create mode 100644 src/main/resources/u3e/MYSQL/hive_u3e_queries.yaml create mode 100644 src/main/resources/u3e/proc/hive_u3e_procs.yaml create mode 100644 u3e.md diff --git a/README.md b/README.md index ebf3a0b..dd284aa 100644 --- a/README.md +++ b/README.md @@ -4,32 +4,41 @@ This application has 3 sub-programs: - [`sre`](./sre.md) is used to find potential 'Hive' performance issues caused by small files and excessive partitions. - [`u3`](./u3.md) is used to review 'Hive 1/2' environments for Hive3 upgrade planning. +- **NEW** [`u3e`](./u3e.md) is used to 'execute' the upgrade scripts for Hive3 directly against the metastore database. Run against the metastore DB **AFTER** the upgrade to Hive 3. - [`perf`](./perf.md) is used to check the throughput of a JDBC connection. ### [Trouble-Shooting](./troubleshooting.md) ## Supported Metastore DB's -| Sub-Program | Database | Version | Tested | Notes | -|:---|:---|:---|:---------------------------------|:---------------------------------------------------------------------------------------------------------------------------------| -| `u3` | MySql | 5.6 | Limited | Recommend upgrading 5.7. This is the lower MySql supported env for HDP | -| | | 5.7 | Yes | | -| | | 8.0 | No | Not supported by HDP | -| | MariaDb | 10.1 | No, but should work as 10.2 does | | -| | | 10.2 | Yes | | -| | Postgresql | 9.6 | No, but should work | | -| | | 10 | Yes | Field Tested, May still be a few rough edges | -| | | 11 | No, but should work at 10 does | | -| | Oracle | 12 | Yes | Field Tested, May still be a few rough edges | -| `sre` | MySql | 5.6 | Limited | Recommend upgrading 5.7. This is the lower MySql supported env for HDP | -| | | 5.7 | Partly | Some `sre` reports use CTE in the SQL, which isn't supported in this version. Those report will error, the other will run fine. | -| | | 8.0 | No | Not supported by HDP | -| | MariaDb | 10.1 | No, but should work as 10.2 does | | -| | | 10.2 | Yes | | -| | Postgresql | 9.6 | No, but should work | | -| | | 10 | Yes | Field Tested, May still be a few rough edges | -| | | 11 | No, but should work at 10 does | | -| | Oracle | 12 | Yes | Field Tested, May still be a few rough edges | + +| Sub-Program | Database | Version | Tested | Notes | +|:------------|:----------------|:--------|:---------------------------------|:---------------------------------------------------------------------------------------------------------------------------------| +| `u3` | MySql | 5.6 | Limited | Recommend upgrading 5.7. This is the lower MySql supported env for HDP | +| | | 5.7 | Yes | | +| | | 8.0 | No | Not supported by HDP | +| | MariaDb | 10.1 | No, but should work as 10.2 does | | +| | | 10.2 | Yes | | +| | Postgresql | 9.6 | No, but should work | | +| | | 10 | Yes | Field Tested, May still be a few rough edges | +| | | 11 | No, but should work at 10 does | | +| | Oracle | 12 | Yes | Field Tested, May still be a few rough edges | +| `u3e` | MySql | 5.6 | Limited | Recommend upgrading 5.7. This is the lower MySql supported env for HDP | +| | | 5.7 | Yes | | +| | | 8.0 | No | Not supported by HDP | +| | MariaDb | 10.1 | No, but should work as 10.2 does | | +| | | 10.2 | Yes | | +| | Postgresql | * | NOT YET IMPLEMENTED | | +| | Oracle | * | NOT YET IMPLEMENTED | | +| `sre` | MySql | 5.6 | Limited | Recommend upgrading 5.7. This is the lower MySql supported env for HDP | +| | | 5.7 | Partly | Some `sre` reports use CTE in the SQL, which isn't supported in this version. Those report will error, the other will run fine. | +| | | 8.0 | No | Not supported by HDP | +| | MariaDb | 10.1 | No, but should work as 10.2 does | | +| | | 10.2 | Yes | | +| | Postgresql | 9.6 | No, but should work | | +| | | 10 | Yes | Field Tested, May still be a few rough edges | +| | | 11 | No, but should work at 10 does | | +| | Oracle | 12 | Yes | Field Tested, May still be a few rough edges | Ensure you have the database appropriate driver in the `${HOME}/.hive-sre/aux_libs` directory. diff --git a/dependency-reduced-pom.xml b/dependency-reduced-pom.xml new file mode 100644 index 0000000..70b6e73 --- /dev/null +++ b/dependency-reduced-pom.xml @@ -0,0 +1,274 @@ + + + 4.0.0 + com.cloudera.utils.hive + hive-sre + hive-sre + 3.0.0.0 + https://github.com/cloudera-labs/hive-sre + + ${project.artifactId} + + + + maven-clean-plugin + ${maven.clean.version} + + + maven-resources-plugin + ${maven.resources.version} + + + maven-compiler-plugin + ${maven.compiler.version} + + + maven-surefire-plugin + ${maven.surefire.version} + + + maven-install-plugin + ${maven.install.version} + + + maven-deploy-plugin + ${maven.deploy.version} + + + maven-site-plugin + ${maven.site.version} + + + maven-project-info-reports-plugin + ${maven.reports.version} + + + maven-shade-plugin + ${maven.shade.plugin.version} + + + maven-assembly-plugin + ${maven.assembly.version} + + + maven-jar-plugin + ${maven.jar.version} + + + + + + maven-compiler-plugin + + ${maven.compiler.source} + ${maven.compiler.target} + + + + maven-jar-plugin + + + + true + true + + + ${hadoop.version} + ${maven.build.timestamp} + ${project.version} + + + + + + maven-shade-plugin + + + package + + shade + + + false + ${project.artifactId}-shaded + true + + + *:* + + META-INF/*.SF + META-INF/*.RSA + META-INF/*.INF + META-INF/LICENSE + META-INF/**/pom.xml + META-INF/**/pom.properties + org/apache/logging/log4j/core/lookup/JndiLookup.class + + + + + + + + + + + shaded-no-hadoop + package + + shade + + + false + ${project.artifactId}-shaded-no-hadoop + true + + + com.jcabi:* + org.sonatype.jline:jline + commons-io:commons-io + com.cloudera.utils.hadoop:* + com.fasterxml.jackson.core:* + com.fasterxml.jackson.dataformat:* + com.zaxxer:HikariCP + org.apache.commons:commons-pool2 + org.apache.commons:commons-lang3 + org.commonmark:* + org.yaml:snakeyaml + commons-logging:commons-logging + javax.validation:validation-api + + + + + + META-INF/*.SF + META-INF/*.RSA + META-INF/*.INF + META-INF/LICENSE + META-INF/**/pom.xml + META-INF/**/pom.properties + org/apache/logging/log4j/core/lookup/JndiLookup.class + + + + + + + + com.streever.hive.Sre + + + + + + + + maven-assembly-plugin + + + make-assembly + package + + single + + + + + + src/main/assembly/assembly.xml + + + + + + + + pvc + + 1.8 + 1.8 + 7.1.8.8-3 + + + + cloud + + 1.8 + 1.8 + 7.2.16.0-287 + + + + + + cloudera + https://repository.cloudera.com/artifactory/cloudera-repos/ + + + + + org.apache.hadoop + hadoop-mapreduce-client-common + 3.1.1.7.1.8.8-3 + provided + + + org.postgresql + postgresql + 42.4.3 + provided + + + org.mariadb.jdbc + mariadb-java-client + 2.7.3 + provided + + + org.lz4 + lz4-java + 1.8.0 + compile + + + junit + junit + 4.13.2 + test + + + hamcrest-core + org.hamcrest + + + + + + 3.1.2 + 3.7.1 + 3.3.0 + 2.4.3.3 + 4.0.3 + 12.1.0.2 + 2.7.3 + 1.0.0 + UTF-8 + 3.1.1 + 1.8 + 4.13.2 + 42.4.3 + 3.1.1 + 3.6.0 + 3.3.1 + 3.3.1 + 1.8 + 3.5.0 + 3.11.0 + 2.13.5 + 3.0.0 + 2.0.1.Final + 3.1.1 + 1.8.0 + + diff --git a/pom.xml b/pom.xml index 21872e2..7c79e48 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ com.cloudera.utils.hive hive-sre - 2.5.1.5 + 3.0.0.0 hive-sre @@ -32,31 +32,31 @@ UTF-8 - 3.8.1 + 3.11.0 1.8 1.8 - 3.1.0 - 3.0.2 - 2.22.1 - 3.0.2 - 2.5.2 - 2.8.2 + 3.3.1 + 3.3.1 + 3.1.2 + 3.3.0 + 3.1.1 + 3.1.1 3.7.1 3.0.0 - 3.2.1 - 3.3.0 + 3.5.0 + 3.6.0 2.7.3 - 42.4.1 + 42.4.3 12.1.0.2 2.0.1.Final - 2.13.3 + 2.13.5 1.8.0 - 2.9.0 + 4.0.3 4.13.2 @@ -81,6 +81,20 @@ com.cloudera.utils.hadoop hadoop-cli ${hadoop-cli.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + + @@ -90,11 +104,11 @@ provided - + - org.apache.commons - commons-dbcp2 - ${commons-dbcp2.version} + com.zaxxer + HikariCP + ${hikari.version} @@ -304,7 +318,7 @@ com.cloudera.utils.hadoop:* com.fasterxml.jackson.core:* com.fasterxml.jackson.dataformat:* - org.apache.commons:commons-dbcp2 + com.zaxxer:HikariCP org.apache.commons:commons-pool2 org.apache.commons:commons-lang3 org.commonmark:* diff --git a/src/main/java/com/cloudera/utils/Sre.java b/src/main/java/com/cloudera/utils/Sre.java index 126f017..ffde509 100644 --- a/src/main/java/com/cloudera/utils/Sre.java +++ b/src/main/java/com/cloudera/utils/Sre.java @@ -49,7 +49,7 @@ public void init(String[] args) throws Throwable { // cust // tc - transaction cleanup String subApp = null; - if (args.length > 0 && args[0].matches("perf|sre|u3|cust|dba|stats")) { + if (args.length > 0 && args[0].matches("perf|sre|u3|u3e|cust|dba|stats")) { System.err.println("Launching: " + args[0]); subApp = args[0]; } else { @@ -72,6 +72,9 @@ public void init(String[] args) throws Throwable { case "u3": sreApp = new HiveFrameworkCheck("u3", "/u3/proc/hive_u3_procs.yaml"); break; + case "u3e": + sreApp = new HiveFrameworkCheck("u3e", "/u3e/proc/hive_u3e_procs.yaml"); + break; case "cust": sreApp = new HiveFrameworkCheck(); break; diff --git a/src/main/java/com/cloudera/utils/hive/HiveFrameworkCheck.java b/src/main/java/com/cloudera/utils/hive/HiveFrameworkCheck.java index 2799678..e2079b9 100644 --- a/src/main/java/com/cloudera/utils/hive/HiveFrameworkCheck.java +++ b/src/main/java/com/cloudera/utils/hive/HiveFrameworkCheck.java @@ -49,7 +49,7 @@ public class HiveFrameworkCheck implements SreSubApp { private static final Logger LOG = LogManager.getLogger(HiveFrameworkCheck.class); - private final String USAGE_CMD = "hive-sre u3|sre|perf -cdh|-hdp2|-hdp3|-all|-i -o [options]"; + private final String USAGE_CMD = "hive-sre u3|u3e|sre|perf -cdh|-hdp2|-hdp3|-all|-i -o [options]"; private String name; private String stackResource; @@ -212,6 +212,8 @@ public Boolean init(String[] args) { configURL.toString()); } + procContainer.setParent(this); + if (cmd.hasOption("i")) { String[] includeIds = cmd.getOptionValues("i"); List includes = Arrays.asList(includeIds); @@ -254,9 +256,9 @@ public Boolean init(String[] args) { if (includes.contains(proc.getId())) { proc.setActive(Boolean.TRUE); proc.setSkip(Boolean.FALSE); - if (proc.getId().equals("3")) { - ((DbSetProcess) proc).setCommandChecks(null); - } +// if (proc.getId().equals("3")) { +// ((DbSetProcess) proc).setCommandChecks(null); +// } } else { proc.setActive(Boolean.FALSE); proc.setSkip(Boolean.TRUE); @@ -271,9 +273,9 @@ public Boolean init(String[] args) { if (includes.contains(proc.getId())) { proc.setActive(Boolean.TRUE); proc.setSkip(Boolean.FALSE); - if (proc.getId().equals("3")) { - ((DbSetProcess) proc).setCommandChecks(null); - } +// if (proc.getId().equals("3")) { +// ((DbSetProcess) proc).setCommandChecks(null); +// } } else { proc.setActive(Boolean.FALSE); proc.setSkip(Boolean.TRUE); diff --git a/src/main/java/com/cloudera/utils/hive/config/SreProcessesConfig.java b/src/main/java/com/cloudera/utils/hive/config/SreProcessesConfig.java index 701ff04..4e077a8 100644 --- a/src/main/java/com/cloudera/utils/hive/config/SreProcessesConfig.java +++ b/src/main/java/com/cloudera/utils/hive/config/SreProcessesConfig.java @@ -118,7 +118,6 @@ public Messages getWarnings() { public Boolean validate() { Boolean rtn = Boolean.TRUE; - return rtn; } diff --git a/src/main/java/com/cloudera/utils/hive/sre/ConnectionPools.java b/src/main/java/com/cloudera/utils/hive/sre/ConnectionPools.java index e51845f..c0eae6e 100644 --- a/src/main/java/com/cloudera/utils/hive/sre/ConnectionPools.java +++ b/src/main/java/com/cloudera/utils/hive/sre/ConnectionPools.java @@ -19,10 +19,8 @@ import com.cloudera.utils.hive.config.DBStore; import com.cloudera.utils.hive.config.SreProcessesConfig; -import com.cloudera.utils.hive.dba.QueryStore; -import org.apache.commons.dbcp2.*; -import org.apache.commons.pool2.ObjectPool; -import org.apache.commons.pool2.impl.GenericObjectPool; +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; import javax.sql.DataSource; import java.sql.Connection; @@ -31,40 +29,39 @@ public class ConnectionPools { private SreProcessesConfig config; - private PoolingDataSource metastoreDirectDataSource = null; - private PoolingDataSource hs2DataSource = null; - private PoolingDataSource queryAnalysisDataSource = null; + private HikariDataSource metastoreDirectDataSource = null; +// private HikariDataSource hs2DataSource = null; + private HikariDataSource queryAnalysisDataSource = null; public ConnectionPools(SreProcessesConfig config) { this.config = config; } - public void init() { - initMetastoreDataSource(); - initQueryAnalysisDataSource(); - initHs2DataSource(); - } + public void close() { +// if (hs2DataSource != null) +// hs2DataSource.close(); - public DataSource getMetastoreDirectDataSource() { - if (metastoreDirectDataSource == null) { - initMetastoreDataSource(); - } - return metastoreDirectDataSource; - } + if (metastoreDirectDataSource != null) + metastoreDirectDataSource.close(); - public PoolingDataSource getQueryAnalysisDataSource() { - if (queryAnalysisDataSource == null) { - initQueryAnalysisDataSource(); - } - return queryAnalysisDataSource; + if (queryAnalysisDataSource != null) + queryAnalysisDataSource.close(); } - public DataSource getHs2DataSource() { - if (hs2DataSource == null) { - initHs2DataSource(); - } - return hs2DataSource; - } +// public Connection getHs2Connection() throws SQLException { +// Connection conn = null; +// if (getHs2DataSource() != null) { +// conn = getHs2DataSource().getConnection(); +// } +// return conn; +// } +// +// public DataSource getHs2DataSource() { +// if (hs2DataSource == null) { +// initHs2DataSource(); +// } +// return hs2DataSource; +// } public Connection getMetastoreDirectConnection() throws SQLException { Connection conn = null; @@ -77,6 +74,13 @@ public Connection getMetastoreDirectConnection() throws SQLException { return conn; } + public DataSource getMetastoreDirectDataSource() { + if (metastoreDirectDataSource == null) { + initMetastoreDataSource(); + } + return metastoreDirectDataSource; + } + public Connection getQueryAnalysisConnection() throws SQLException { Connection conn = null; if (getQueryAnalysisDataSource() != null) { @@ -88,51 +92,63 @@ public Connection getQueryAnalysisConnection() throws SQLException { return conn; } - public Connection getHs2Connection() throws SQLException { - Connection conn = null; - if (getHs2DataSource() != null) { - conn = getHs2DataSource().getConnection(); + public DataSource getQueryAnalysisDataSource() { + if (queryAnalysisDataSource == null) { + initQueryAnalysisDataSource(); } - return conn; + return queryAnalysisDataSource; } - protected void initMetastoreDataSource() { - // Metastore Direct - if (config.getMetastoreDirect() != null) { - DBStore msdb = config.getMetastoreDirect(); - ConnectionFactory msconnectionFactory = - new DriverManagerConnectionFactory(msdb.getUri(), msdb.getConnectionProperties()); - - PoolableConnectionFactory mspoolableConnectionFactory = - new PoolableConnectionFactory(msconnectionFactory, null); - - ObjectPool msconnectionPool = - new GenericObjectPool<>(mspoolableConnectionFactory); - - mspoolableConnectionFactory.setPool(msconnectionPool); - - this.metastoreDirectDataSource = - new PoolingDataSource<>(msconnectionPool); - } + public void init() { + initMetastoreDataSource(); + initQueryAnalysisDataSource(); +// initHs2DataSource(); } - protected void initHs2DataSource() { - // this is optional. - if (config.getHs2() != null) { - DBStore hs2db = config.getHs2(); - ConnectionFactory hs2connectionFactory = - new DriverManagerConnectionFactory(hs2db.getUri(), hs2db.getConnectionProperties()); +// protected void initHs2DataSource() { +// // this is optional. +// if (config.getHs2() != null) { +// DBStore hs2db = config.getHs2(); +// ConnectionFactory hs2connectionFactory = +// new DriverManagerConnectionFactory(hs2db.getUri(), hs2db.getConnectionProperties()); +// +// PoolableConnectionFactory hs2poolableConnectionFactory = +// new PoolableConnectionFactory(hs2connectionFactory, null); +// +// ObjectPool hs2connectionPool = +// new GenericObjectPool<>(hs2poolableConnectionFactory); +// +// hs2poolableConnectionFactory.setPool(hs2connectionPool); +// +// this.hs2DataSource = +// new PoolingDataSource<>(hs2connectionPool); +// } +// } - PoolableConnectionFactory hs2poolableConnectionFactory = - new PoolableConnectionFactory(hs2connectionFactory, null); - - ObjectPool hs2connectionPool = - new GenericObjectPool<>(hs2poolableConnectionFactory); - - hs2poolableConnectionFactory.setPool(hs2connectionPool); + protected void initMetastoreDataSource() { + // Metastore Direct + if (config.getMetastoreDirect() != null) { - this.hs2DataSource = - new PoolingDataSource<>(hs2connectionPool); + HikariConfig cpConfig = new HikariConfig(); + cpConfig.setJdbcUrl(config.getMetastoreDirect().getUri()); + cpConfig.setDataSourceProperties(config.getMetastoreDirect().getConnectionProperties()); + metastoreDirectDataSource = new HikariDataSource(cpConfig); + + // Test Connection. + Connection conn = null; + try { + conn = metastoreDirectDataSource.getConnection(); + } catch (Throwable t) { + if (conn != null) { + try { + conn.close(); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } else { + throw new RuntimeException(t); + } + } } } @@ -140,22 +156,27 @@ protected void initQueryAnalysisDataSource() { // Query Analysis if (config.getQueryAnalysis() != null) { - QueryStore queryAnalysis = config.getQueryAnalysis(); - ConnectionFactory qaconnectionFactory = - new DriverManagerConnectionFactory(queryAnalysis.getUri(), queryAnalysis.getConnectionProperties()); - - PoolableConnectionFactory qapoolableConnectionFactory = - new PoolableConnectionFactory(qaconnectionFactory, null); - - ObjectPool qaconnectionPool = - new GenericObjectPool<>(qapoolableConnectionFactory); - - qapoolableConnectionFactory.setPool(qaconnectionPool); - - this.queryAnalysisDataSource = - new PoolingDataSource<>(qaconnectionPool); + HikariConfig cpConfig = new HikariConfig(); + cpConfig.setJdbcUrl(config.getQueryAnalysis().getUri()); + cpConfig.setDataSourceProperties(config.getQueryAnalysis().getConnectionProperties()); + queryAnalysisDataSource = new HikariDataSource(cpConfig); + + // Test Connection. + Connection conn = null; + try { + conn = queryAnalysisDataSource.getConnection(); + } catch (Throwable t) { + if (conn != null) { + try { + conn.close(); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } else { + throw new RuntimeException(t); + } + } } - } } diff --git a/src/main/java/com/cloudera/utils/hive/sre/MetastoreActionProcess.java b/src/main/java/com/cloudera/utils/hive/sre/MetastoreActionProcess.java new file mode 100644 index 0000000..26a3607 --- /dev/null +++ b/src/main/java/com/cloudera/utils/hive/sre/MetastoreActionProcess.java @@ -0,0 +1,155 @@ +package com.cloudera.utils.hive.sre; + +import com.cloudera.utils.hive.reporting.CounterGroup; +import com.cloudera.utils.hive.reporting.ReportingConf; +import com.cloudera.utils.hive.reporting.TaskState; +import com.cloudera.utils.sql.QueryDefinition; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + +import java.io.FileNotFoundException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; + +public class MetastoreActionProcess extends MetastoreProcess { + + private static Logger LOG = LogManager.getLogger(MetastoreActionProcess.class); + + // Specify whether any failure would trigger a rollback of the changes. Setting this to true will start a + // transaction before the first statement. After all statements have completed, they will be committed. + // If any of the statements fail, a rollback would be issued. (requires db and driver support). + private Boolean transactional = Boolean.TRUE; + private String actionDefinition; + + @Override + public void init(ProcessContainer parent) throws FileNotFoundException { + super.init(parent); + LOG.info("Init: " + getId()); + counterGroup = new CounterGroup(getUniqueName()); + QueryDefinition queryDefinition = getQueryDefinitions().getQueryDefinition(actionDefinition); + counterGroup.addAndGetTaskState(TaskState.CONSTRUCTED, queryDefinition.getPreparedStatements().size()); + getParent().getReporter().addCounter(counterGroup, null); + LOG.info("Init (done): " + getId()); + } + + @Override + public String call() throws Exception { + if (isTestSQL()) { + testSQLScript(); + } else { + doIt(); + } + return "done" ; + } + + public void doIt() { + // Get a connection + Connection conn = null; + LOG.info("doIt: " + getId()); + success.println(ReportingConf.substituteVariables(getTitle())); + if (getNote() != null) + success.println(getNote()); + + if (getHeader() != null) + success.println(getHeader()); + LOG.info(this.getDisplayName()); + + try { + LOG.info("doIt (getting connection): " + getId()); + conn = getParent().getConnectionPools().getMetastoreDirectConnection(); + LOG.info("doIt (connection made): " + getId()); + QueryDefinition queryDefinition = getQueryDefinitions().getQueryDefinition(actionDefinition); + // + if (queryDefinition.getPreparedStatements() == null || queryDefinition.getPreparedStatements().size() < 1) { + // This doesn't have what we're looking for. It needs some preparedStatement definitions. + error.println("No prepared statements found."); + throw new RuntimeException("missing preparedStatements."); + } else { + if (transactional) { + conn.setAutoCommit(Boolean.FALSE); + LOG.info("Transaction (Start): " + getId()); + } + // We should be looking at the prepared statements. + for (String action : queryDefinition.getPreparedStatements().keySet()) { + String actionSql = queryDefinition.getPreparedStatements().get(action); + LOG.info("SQL for : " + action + ": " + getId() + " -> " + actionSql); + LOG.info("Statement (Preparing) for: " + action + " " + getId()); + PreparedStatement preparedStatement = conn.prepareStatement(actionSql); + // TODO: Check for Parameters.. + LOG.info("Statement (Running) for: " + action + " " + getId()); + Long recordAffected = preparedStatement.executeLargeUpdate(); + LOG.info("Statement (Completed) for: " + action + " " + getId()); + success.println("Action for: " + action + " affected " + recordAffected + " records."); + // TODO: Record the number of records affected. + LOG.info("The query: " + action + " affected " + recordAffected + " records."); + counterGroup.addAndGetTaskState(TaskState.PROCESSED, 1); + } + if (transactional) { + LOG.info("Committing Transaction: " + getId()); + conn.commit(); + LOG.info("CommittED Transaction: " + getId()); + } + } + } catch (SQLException e) { + if (transactional && conn != null) { + try { + LOG.error("Issue, rollback started: " + getId(), e); + conn.rollback(); + LOG.error("Issue, rollback complete: " + getId(), e); + } catch (SQLException ex) { + throw new RuntimeException(ex); + } + } + counterGroup.addAndGetTaskState(TaskState.ERROR, 1); + throw new RuntimeException(e); + } + LOG.info("doIt (done): " + getId()); + setInitializing(Boolean.FALSE); + setActive(Boolean.FALSE); + } + + public String getActionDefinition() { + return actionDefinition; + } + + public void setActionDefinition(String actionDefinition) { + this.actionDefinition = actionDefinition; + } + + public Boolean getTransactional() { + return transactional; + } + + public void setTransactional(Boolean transactional) { + this.transactional = transactional; + } + + @Override + public Boolean testSQLScript() { + Boolean rtn = Boolean.TRUE; + + LOG.info("Testing SQL Definition: " + actionDefinition); + // build prepared statement for targetQueryDef + QueryDefinition queryDefinition = getQueryDefinitions().getQueryDefinition(actionDefinition); + // + if (queryDefinition.getPreparedStatements() == null || queryDefinition.getPreparedStatements().size() < 1) { + // This doesn't have what we're looking for. It needs some preparedStatement definitions. + rtn = Boolean.FALSE; + } else { + // We should be looking at the prepared statements. + for (String action : queryDefinition.getPreparedStatements().keySet()) { + LOG.info("SQL for : " + action); + LOG.info(queryDefinition.getPreparedStatements().get(action)); + } + } + setInitializing(Boolean.FALSE); + return rtn; + } + + @Override + public String toString() { + return "MetastoreActionProcess{}" ; + } + +} diff --git a/src/main/java/com/cloudera/utils/hive/sre/ProcessContainer.java b/src/main/java/com/cloudera/utils/hive/sre/ProcessContainer.java index 915b136..e8825a2 100644 --- a/src/main/java/com/cloudera/utils/hive/sre/ProcessContainer.java +++ b/src/main/java/com/cloudera/utils/hive/sre/ProcessContainer.java @@ -17,6 +17,7 @@ package com.cloudera.utils.hive.sre; +import com.cloudera.utils.hive.HiveFrameworkCheck; import com.cloudera.utils.hive.config.DBStore; import com.cloudera.utils.hive.config.SreProcessesConfig; import com.cloudera.utils.hive.reporting.Reporter; @@ -48,12 +49,12 @@ The 'ProcessContainer' is the definition and runtime structure */ @JsonIgnoreProperties({"config", "reporter", "taskThreadPool", "procThreadPool", "processThreads", "cliPool", - "connectionPools", "outputDirectory", "dbsOverride", "includeFilter", "excludeFilter", "testSQL"}) + "connectionPools", "outputDirectory", "dbsOverride", "includeFilter", "excludeFilter", "testSQL", "parent"}) public class ProcessContainer implements Runnable { private static Logger LOG = LogManager.getLogger(ProcessContainer.class); private String module; - + private HiveFrameworkCheck parent; private boolean initializing = Boolean.TRUE; private SreProcessesConfig config; private Reporter reporter; @@ -107,6 +108,10 @@ public void setConfig(SreProcessesConfig config) { this.reporter.setRefreshInterval(this.config.getReportingInterval()); } + public void setParent(HiveFrameworkCheck parent) { + this.parent = parent; + } + public Reporter getReporter() { return reporter; } @@ -316,8 +321,18 @@ protected String init(String config, String outputDirectory) { String yamlCfgFile = FileUtils.readFileToString(cfgFile, Charset.forName("UTF-8")); SreProcessesConfig sreConfig = mapper.readerFor(SreProcessesConfig.class).readValue(yamlCfgFile); sreConfig.validate(); + // If there is more than one MetastoreActionProcess, we need to reduce the parallelism to 1 to + // Avoid potential deadlocks on the metastore db they may happen from different threads. + int mapProcCount = 0; + for (SreProcessBase proc: getProcesses()) { + if (proc instanceof MetastoreActionProcess) + mapProcCount++; + } + if (mapProcCount > 1) { + sreConfig.setParallelism(1); + } setConfig(sreConfig); - + validate(); } catch ( IOException e) { throw new RuntimeException("Issue getting configs", e); @@ -328,6 +343,12 @@ protected String init(String config, String outputDirectory) { return jobRunDir; } + private void validate() { + if (parent.getName().equals("u3e") && getConfig().getMetastoreDirect().getType() != DBStore.DB_TYPE.MYSQL) { + throw new RuntimeException("The `u3e` process only works with MYSQL database types at this time."); + } + } + protected void initResources() { try { this.connectionPools = new ConnectionPools(getConfig()); diff --git a/src/main/java/com/cloudera/utils/hive/sre/SreProcessBase.java b/src/main/java/com/cloudera/utils/hive/sre/SreProcessBase.java index b8fd0d7..cf64d1e 100644 --- a/src/main/java/com/cloudera/utils/hive/sre/SreProcessBase.java +++ b/src/main/java/com/cloudera/utils/hive/sre/SreProcessBase.java @@ -28,6 +28,8 @@ import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.commons.io.IOUtils; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import java.io.*; import java.net.URL; @@ -42,10 +44,13 @@ @JsonSubTypes({ @JsonSubTypes.Type(value = DbSetProcess.class, name = "dbSet"), @JsonSubTypes.Type(value = MetastoreQueryProcess.class, name = "metastore.query"), + @JsonSubTypes.Type(value = MetastoreActionProcess.class, name = "metastore.action"), @JsonSubTypes.Type(value = MetastoreReportProcess.class, name = "metastore.report")}) + public abstract class SreProcessBase implements Callable { + private static Logger LOG = LogManager.getLogger(SreProcessBase.class); private String displayName = "not set"; - private String title = null; + private String title = "not set"; private String note = null; private String id = null; private Boolean skip = Boolean.FALSE; @@ -310,7 +315,7 @@ public String getOutputDetails() { public void init(ProcessContainer parent) throws FileNotFoundException { setParent(parent); setInitializing(Boolean.TRUE); - + LOG.info("Init: " + getId()); ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); mapper.enable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); @@ -332,6 +337,7 @@ public void init(ProcessContainer parent) throws FileNotFoundException { } catch (Exception e) { throw new RuntimeException("Issue getting configs", e); } + LOG.info("Init (done): " + getId()); } public abstract Boolean testSQLScript(); diff --git a/src/main/java/com/cloudera/utils/sql/QueryDefinition.java b/src/main/java/com/cloudera/utils/sql/QueryDefinition.java index 48b1afe..7f15cf8 100644 --- a/src/main/java/com/cloudera/utils/sql/QueryDefinition.java +++ b/src/main/java/com/cloudera/utils/sql/QueryDefinition.java @@ -23,6 +23,7 @@ public class QueryDefinition { private String statement; + private Map preparedStatements; @JsonProperty("parameters") private Map parameters; @@ -34,6 +35,14 @@ public void setStatement(String statement) { this.statement = statement; } + public Map getPreparedStatements() { + return preparedStatements; + } + + public void setPreparedStatements(Map preparedStatements) { + this.preparedStatements = preparedStatements; + } + public Map getParameters() { return parameters; } diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties index 926a201..334ac99 100644 --- a/src/main/resources/log4j.properties +++ b/src/main/resources/log4j.properties @@ -48,3 +48,4 @@ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c[%t]: %m%n # Debugging Pattern format #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.logger.com.zaxxer.hikari=DEBUG,DRFA diff --git a/src/main/resources/u3/MYSQL/hive_u3_queries.yaml b/src/main/resources/u3/MYSQL/hive_u3_queries.yaml index f3ee04b..3ef2e76 100644 --- a/src/main/resources/u3/MYSQL/hive_u3_queries.yaml +++ b/src/main/resources/u3/MYSQL/hive_u3_queries.yaml @@ -348,54 +348,30 @@ WHERE t.tbl_serde_slib IN )) b2) AS sub) a ) " - managed_2_acid: + legacy_managed: statement: " -SELECT DISTINCT - DB_NAME - , TBL_NAME - , CREATED_DATE - , TBL_TYPE - , TBL_LOCATION -FROM ( - SELECT - DB.NAME AS DB_NAME - , TBL.TBL_NAME - , DATE_FORMAT(FROM_UNIXTIME(TBL.CREATE_TIME), '%Y-%m-%d') AS CREATED_DATE - , S.LOCATION AS TBL_LOCATION - , CASE CONCAT(LOWER(TBL_TYPE), LOWER(P.PARAM_KEY), LOWER(P.PARAM_VALUE)) - WHEN 'external_tableexternaltrue' THEN 'EXTERNAL' - WHEN 'managed_tabletransactionaltrue' THEN 'ACID' - ELSE 'LEGACY_MANAGED' - END AS TBL_TYPE - FROM DBS DB - JOIN - TBLS TBL ON - DB.DB_ID = TBL.DB_ID - INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID - LEFT OUTER JOIN ( - SELECT - SP.TBL_ID - , SP.PARAM_KEY - , SP.PARAM_VALUE - FROM TABLE_PARAMS SP - WHERE - LOWER(SP.PARAM_KEY) = 'transactional' - OR LOWER(SP.PARAM_KEY) = 'external' - ) P - ON TBL.TBL_ID = P.TBL_ID - WHERE - DB.NAME LIKE ? - AND TBL.TBL_TYPE = 'MANAGED_TABLE' - ) WCAT +SELECT + D.NAME + , T.TBL_NAME + , DATE_FORMAT(FROM_UNIXTIME(T.CREATE_TIME), '%Y-%m-%d') AS CREATED_DATE + , T.TBL_TYPE + , S.LOCATION +FROM TBLS T + INNER JOIN DBS D ON T.DB_ID = D.DB_ID + INNER JOIN SDS S ON T.SD_ID = S.SD_ID WHERE - TBL_TYPE = 'LEGACY_MANAGED' -ORDER BY DB_NAME, TBL_TYPE + T.TBL_ID NOT IN ( + SELECT + T2.TBL_ID + FROM TABLE_PARAMS TP + INNER JOIN TBLS T2 ON TP.TBL_ID = T2.TBL_ID + WHERE + TP.PARAM_KEY = 'transactional' + AND BINARY TP.PARAM_VALUE = 'true') + AND TBL_TYPE = 'MANAGED_TABLE' +ORDER BY + D.NAME, T.TBL_NAME " - parameters: - dbs: - initial: "%" - sqlType: 12 - location: 1 managed_tbl_shadows: statement: " SELECT @@ -676,6 +652,49 @@ WHERE T.TBL_SERDE_SLIB IN initial: "com.cloudera.kudu.hive.KuduStorageHandler" sqlType: 12 location: 2 + acid_bucket_report: + statement: " +SELECT + DB_NAME + , TBL_NAME + , TBL_ID +FROM ( + SELECT DISTINCT + DB.NAME AS DB_NAME + , TBL.TBL_NAME + , TBL.TBL_ID + FROM DBS DB + JOIN + TBLS TBL ON + DB.DB_ID = TBL.DB_ID + INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID + INNER JOIN ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'transactional' + AND LOWER(SP.PARAM_VALUE) = 'true' + ) P + ON TBL.TBL_ID = P.TBL_ID + ) WCAT +WHERE + WCAT.TBL_ID NOT IN ( + SELECT + TBL_ID + FROM ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'bucketing_version' + AND SP.PARAM_VALUE = '2' + ) BPARAMS) + " acid_bucket_version: statement: " SELECT @@ -752,3 +771,51 @@ WHERE initial: "not_set" sqlType: 12 location: 2 + managed_2_acid: + statement: " +SELECT DISTINCT + DB_NAME + , TBL_NAME + , CREATED_DATE + , TBL_TYPE + , TBL_LOCATION +FROM ( + SELECT + DB.NAME AS DB_NAME + , TBL.TBL_NAME + , DATE_FORMAT(FROM_UNIXTIME(TBL.CREATE_TIME), '%Y-%m-%d') AS CREATED_DATE + , S.LOCATION AS TBL_LOCATION + , CASE CONCAT(LOWER(TBL_TYPE), LOWER(P.PARAM_KEY), LOWER(P.PARAM_VALUE)) + WHEN 'external_tableexternaltrue' THEN 'EXTERNAL' + WHEN 'managed_tabletransactionaltrue' THEN 'ACID' + ELSE 'LEGACY_MANAGED' + END AS TBL_TYPE + FROM DBS DB + JOIN + TBLS TBL ON + DB.DB_ID = TBL.DB_ID + INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID + LEFT OUTER JOIN ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'transactional' + OR LOWER(SP.PARAM_KEY) = 'external' + ) P + ON TBL.TBL_ID = P.TBL_ID + WHERE + DB.NAME LIKE ? + AND TBL.TBL_TYPE = 'MANAGED_TABLE' + ) WCAT +WHERE + TBL_TYPE = 'LEGACY_MANAGED' +ORDER BY DB_NAME, TBL_TYPE + " + parameters: + dbs: + initial: "%" + sqlType: 12 + location: 1 diff --git a/src/main/resources/u3/ORACLE/hive_u3_queries.yaml b/src/main/resources/u3/ORACLE/hive_u3_queries.yaml index d66b30b..440a4d7 100644 --- a/src/main/resources/u3/ORACLE/hive_u3_queries.yaml +++ b/src/main/resources/u3/ORACLE/hive_u3_queries.yaml @@ -346,68 +346,30 @@ WHERE t.tbl_serde_slib IN )) b2) sub) a ) " - managed_2_acid: + legacy_managed: statement: " - WITH WPARAMS AS ( - SELECT - SP.TBL_ID - , SP.PARAM_KEY - , SP.PARAM_VALUE - FROM - TABLE_PARAMS SP - WHERE - LOWER(SP.PARAM_KEY) = 'transactional' - OR LOWER(SP.PARAM_KEY) = 'external' - ), - WCAT AS ( - SELECT - DB.NAME AS DB_NAME - , TBL.TBL_NAME - , TO_CHAR(TO_DATE('01-jan-1970', 'dd-mon-yyyy') + TBL.CREATE_TIME / 86400, 'yyyy-mm-dd') CREATED_DATE - , S.LOCATION AS TBL_LOCATION - , CASE LOWER(TBL_TYPE) - WHEN 'external_table' THEN 'EXTERNAL' - WHEN 'managed_table' THEN - CASE LOWER(P.PARAM_KEY) - WHEN 'transactional' THEN - CASE LOWER(TO_CHAR(P.PARAM_VALUE)) - WHEN 'true' THEN 'ACID' - ELSE 'LEGACY_MANAGED' - END - ELSE 'LEGACY_MANAGED' - END - ELSE 'LEGACY_MANAGED' - END AS TBL_TYPE - FROM - DBS DB - JOIN - TBLS TBL ON - DB.DB_ID = TBL.DB_ID - INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID - LEFT OUTER JOIN WPARAMS P - ON TBL.TBL_ID = P.TBL_ID - WHERE - DB.NAME LIKE ? - AND TBL.TBL_TYPE = 'MANAGED_TABLE' - ) - SELECT DISTINCT - DB_NAME - , TBL_NAME - , CREATED_DATE - , TBL_TYPE - , TBL_LOCATION - FROM - WCAT + SELECT + D.NAME + , T.TBL_NAME + , TO_CHAR(TO_DATE('01-jan-1970', 'dd-mon-yyyy') + TBL.CREATE_TIME / 86400, 'yyyy-mm-dd') CREATED_DATE + , T.TBL_TYPE + , S.LOCATION + FROM TBLS T + INNER JOIN DBS D ON T.DB_ID = D.DB_ID + INNER JOIN SDS S ON T.SD_ID = S.SD_ID WHERE - TBL_TYPE = 'LEGACY_MANAGED' + T.TBL_ID NOT IN ( + SELECT + T2.TBL_ID + FROM TABLE_PARAMS TP + INNER JOIN TBLS T2 ON TP.TBL_ID = T2.TBL_ID + WHERE + LOWER(TP.PARAM_KEY) = 'transactional' + AND LOWER(TO_CHAR(TP.PARAM_VALUE)) = 'true') + AND TBL_TYPE = 'MANAGED_TABLE' ORDER BY - DB_NAME, TBL_TYPE + D.NAME, T.TBL_NAME " - parameters: - dbs: - initial: "%" - sqlType: 12 - location: 1 managed_tbl_shadows: statement: " SELECT @@ -688,6 +650,60 @@ WHERE T.TBL_SERDE_SLIB IN initial: "com.cloudera.kudu.hive.KuduStorageHandler" sqlType: 12 location: 2 + acid_bucket_report: + statement: " + WITH + TPARAMS AS ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'transactional' + AND LOWER(to_char(SP.PARAM_VALUE)) = 'true' + ), + BPARAMS AS ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'bucketing_version' + AND to_char(SP.PARAM_VALUE) = '2' + ), + WCAT AS ( + SELECT + DB.NAME AS DB_NAME + , TBL.TBL_NAME + , TBL.TBL_ID + FROM DBS DB + JOIN + TBLS TBL ON + DB.DB_ID = TBL.DB_ID + INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID + INNER JOIN TPARAMS P + ON TBL.TBL_ID = P.TBL_ID + ), + ACIDV1 AS ( + SELECT DISTINCT + DB_NAME + , TBL_NAME + , TBL_ID + FROM WCAT + ) + SELECT + DB_NAME + , TBL_NAME + , TBL_ID + FROM ACIDV1 AV1 + WHERE + AV1.TBL_ID NOT IN ( + SELECT + TBL_ID + FROM BPARAMS) + " acid_bucket_version: statement: " WITH @@ -775,3 +791,65 @@ WHERE T.TBL_SERDE_SLIB IN initial: "not_set" sqlType: 12 location: 2 + managed_2_acid: + statement: " + WITH WPARAMS AS ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM + TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'transactional' + OR LOWER(SP.PARAM_KEY) = 'external' + ), + WCAT AS ( + SELECT + DB.NAME AS DB_NAME + , TBL.TBL_NAME + , TO_CHAR(TO_DATE('01-jan-1970', 'dd-mon-yyyy') + TBL.CREATE_TIME / 86400, 'yyyy-mm-dd') CREATED_DATE + , S.LOCATION AS TBL_LOCATION + , CASE LOWER(TBL_TYPE) + WHEN 'external_table' THEN 'EXTERNAL' + WHEN 'managed_table' THEN + CASE LOWER(P.PARAM_KEY) + WHEN 'transactional' THEN + CASE LOWER(TO_CHAR(P.PARAM_VALUE)) + WHEN 'true' THEN 'ACID' + ELSE 'LEGACY_MANAGED' + END + ELSE 'LEGACY_MANAGED' + END + ELSE 'LEGACY_MANAGED' + END AS TBL_TYPE + FROM + DBS DB + JOIN + TBLS TBL ON + DB.DB_ID = TBL.DB_ID + INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID + LEFT OUTER JOIN WPARAMS P + ON TBL.TBL_ID = P.TBL_ID + WHERE + DB.NAME LIKE ? + AND TBL.TBL_TYPE = 'MANAGED_TABLE' + ) + SELECT DISTINCT + DB_NAME + , TBL_NAME + , CREATED_DATE + , TBL_TYPE + , TBL_LOCATION + FROM + WCAT + WHERE + TBL_TYPE = 'LEGACY_MANAGED' + ORDER BY + DB_NAME, TBL_TYPE + " + parameters: + dbs: + initial: "%" + sqlType: 12 + location: 1 diff --git a/src/main/resources/u3/POSTGRES/hive_u3_queries.yaml b/src/main/resources/u3/POSTGRES/hive_u3_queries.yaml index ee3f827..954c297 100644 --- a/src/main/resources/u3/POSTGRES/hive_u3_queries.yaml +++ b/src/main/resources/u3/POSTGRES/hive_u3_queries.yaml @@ -347,50 +347,30 @@ WHERE \"T\".\"TBL_SERDE_SLIB\" IN )) \"B2\") AS \"SUB\") \"A\" ) " - managed_2_acid: + legacy_managed: statement: " - WITH \"WPARAMS\" AS ( - SELECT \"SP\".\"TBL_ID\" - , \"SP\".\"PARAM_KEY\" - , \"SP\".\"PARAM_VALUE\" - FROM \"TABLE_PARAMS\" \"SP\" - WHERE LOWER(\"SP\".\"PARAM_KEY\") = 'transactional' - OR LOWER(\"SP\".\"PARAM_KEY\") = 'external' - ), - \"WCAT\" AS ( - SELECT \"DB\".\"NAME\" AS \"DB_NAME\" - , \"TBL\".\"TBL_NAME\" - , to_timestamp(\"TBL\".\"CREATE_TIME\")::date AS \"CREATED_DATE\" - , \"S\".\"LOCATION\" AS \"TBL_LOCATION\" - , CASE CONCAT(LOWER(\"TBL_TYPE\"), LOWER(\"P\".\"PARAM_KEY\"), LOWER(\"P\".\"PARAM_VALUE\")) - WHEN 'external_tableexternaltrue' THEN 'EXTERNAL' - WHEN 'managed_tabletransactionaltrue' THEN 'ACID' - ELSE 'LEGACY_MANAGED' - END AS \"TBL_TYPE\" - FROM \"DBS\" \"DB\" - JOIN - \"TBLS\" \"TBL\" ON - \"DB\".\"DB_ID\" = \"TBL\".\"DB_ID\" - INNER JOIN \"SDS\" \"S\" ON \"TBL\".\"SD_ID\" = \"S\".\"SD_ID\" - LEFT OUTER JOIN \"WPARAMS\" \"P\" - ON \"TBL\".\"TBL_ID\" = \"P\".\"TBL_ID\" - WHERE \"DB\".\"NAME\" LIKE ? - AND \"TBL\".\"TBL_TYPE\" = 'MANAGED_TABLE' - ) - SELECT DISTINCT \"DB_NAME\" - , \"TBL_NAME\" - , \"CREATED_DATE\" - , \"TBL_TYPE\" - , \"TBL_LOCATION\" - FROM \"WCAT\" - WHERE \"TBL_TYPE\" = 'LEGACY_MANAGED' - ORDER BY \"DB_NAME\", \"TBL_TYPE\" + SELECT + \"D\".\"NAME\" + , \"T\".\"TBL_NAME\" + , TO_CHAR(date(to_timestamp(\"T\".\"CREATE_TIME\")), 'YYYY-MM-DD') AS \"CREATED_DATE\" + , \"T\".\"TBL_TYPE\" + , \"S\".\"LOCATION\" + FROM \"TBLS\" \"T\" + INNER JOIN \"DBS\" \"D\" ON \"T\".\"DB_ID\" = \"D\".\"DB_ID\" + INNER JOIN \"SDS\" \"S\" ON \"T\".\"SD_ID\" = \"S\".\"SD_ID\" + WHERE + \"T\".\"TBL_ID\" NOT IN ( + SELECT + \"T2\".\"TBL_ID\" + FROM \"TABLE_PARAMS\" \"TP\" + INNER JOIN \"TBLS\" \"T2\" ON \"TP\".\"TBL_ID\" = \"T2\".\"TBL_ID\" + WHERE + \"TP\".\"PARAM_KEY\" = 'transactional' + AND LOWER(\"TP\".\"PARAM_VALUE\") = 'true') + AND \"TBL_TYPE\" = 'MANAGED_TABLE' + ORDER BY + \"D\".\"NAME\", \"T\".\"TBL_NAME\" " - parameters: - dbs: - initial: "%" - sqlType: 12 - location: 1 managed_tbl_shadows: statement: " SELECT @@ -674,6 +654,51 @@ WHERE \"T\".\"TBL_SERDE_SLIB\" IN initial: "com.cloudera.kudu.hive.KuduStorageHandler" sqlType: 12 location: 2 + acid_bucket_report: + statement: " + WITH \"TPARAMS\" AS ( + SELECT \"SP\".\"TBL_ID\" + , \"SP\".\"PARAM_KEY\" + , \"SP\".\"PARAM_VALUE\" + FROM \"TABLE_PARAMS\" \"SP\" + WHERE LOWER(\"SP\".\"PARAM_KEY\") = 'transactional' + AND LOWER(\"SP\".\"PARAM_VALUE\") = 'true' + ), + \"BPARAMS\" AS ( + SELECT \"SP\".\"TBL_ID\" + , \"SP\".\"PARAM_KEY\" + , \"SP\".\"PARAM_VALUE\" + FROM \"TABLE_PARAMS\" \"SP\" + WHERE LOWER(\"SP\".\"PARAM_KEY\") = 'bucketing_version' + AND \"SP\".\"PARAM_VALUE\" = '2' + ), + \"WCAT\" AS ( + SELECT \"DB\".\"NAME\" AS \"DB_NAME\" + , \"TBL\".\"TBL_NAME\" + , \"TBL\".\"TBL_ID\" + FROM \"DBS\" \"DB\" + JOIN + \"TBLS\" \"TBL\" ON + \"DB\".\"DB_ID\" = \"TBL\".\"DB_ID\" + INNER JOIN \"SDS\" \"S\" ON \"TBL\".\"SD_ID\" = \"S\".\"SD_ID\" + INNER JOIN \"TPARAMS\" \"P\" + ON \"TBL\".\"TBL_ID\" = \"P\".\"TBL_ID\" + ), + \"ACIDV1\" AS ( + SELECT DISTINCT \"DB_NAME\" + , \"TBL_NAME\" + , \"TBL_ID\" + FROM \"WCAT\" + ) + SELECT \"DB_NAME\" + , \"TBL_NAME\" + , \"TBL_ID\" + FROM \"ACIDV1\" \"AV1\" + WHERE + \"AV1\".\"TBL_ID\" NOT IN ( + SELECT \"TBL_ID\" + FROM \"BPARAMS\") + " acid_bucket_version: statement: " WITH \"TPARAMS\" AS ( @@ -752,3 +777,47 @@ WHERE \"T\".\"TBL_SERDE_SLIB\" IN initial: "not_set" sqlType: 12 location: 2 + managed_2_acid: + statement: " + WITH \"WPARAMS\" AS ( + SELECT \"SP\".\"TBL_ID\" + , \"SP\".\"PARAM_KEY\" + , \"SP\".\"PARAM_VALUE\" + FROM \"TABLE_PARAMS\" \"SP\" + WHERE LOWER(\"SP\".\"PARAM_KEY\") = 'transactional' + OR LOWER(\"SP\".\"PARAM_KEY\") = 'external' + ), + \"WCAT\" AS ( + SELECT \"DB\".\"NAME\" AS \"DB_NAME\" + , \"TBL\".\"TBL_NAME\" + , to_timestamp(\"TBL\".\"CREATE_TIME\")::date AS \"CREATED_DATE\" + , \"S\".\"LOCATION\" AS \"TBL_LOCATION\" + , CASE CONCAT(LOWER(\"TBL_TYPE\"), LOWER(\"P\".\"PARAM_KEY\"), LOWER(\"P\".\"PARAM_VALUE\")) + WHEN 'external_tableexternaltrue' THEN 'EXTERNAL' + WHEN 'managed_tabletransactionaltrue' THEN 'ACID' + ELSE 'LEGACY_MANAGED' + END AS \"TBL_TYPE\" + FROM \"DBS\" \"DB\" + JOIN + \"TBLS\" \"TBL\" ON + \"DB\".\"DB_ID\" = \"TBL\".\"DB_ID\" + INNER JOIN \"SDS\" \"S\" ON \"TBL\".\"SD_ID\" = \"S\".\"SD_ID\" + LEFT OUTER JOIN \"WPARAMS\" \"P\" + ON \"TBL\".\"TBL_ID\" = \"P\".\"TBL_ID\" + WHERE \"DB\".\"NAME\" LIKE ? + AND \"TBL\".\"TBL_TYPE\" = 'MANAGED_TABLE' + ) + SELECT DISTINCT \"DB_NAME\" + , \"TBL_NAME\" + , \"CREATED_DATE\" + , \"TBL_TYPE\" + , \"TBL_LOCATION\" + FROM \"WCAT\" + WHERE \"TBL_TYPE\" = 'LEGACY_MANAGED' + ORDER BY \"DB_NAME\", \"TBL_TYPE\" + " + parameters: + dbs: + initial: "%" + sqlType: 12 + location: 1 diff --git a/src/main/resources/u3/proc/hive_u3_procs.yaml b/src/main/resources/u3/proc/hive_u3_procs.yaml index ac06201..6f45180 100644 --- a/src/main/resources/u3/proc/hive_u3_procs.yaml +++ b/src/main/resources/u3/proc/hive_u3_procs.yaml @@ -91,97 +91,26 @@ processes: reportOnPath: true processOnError: true processOnSuccess: false - - type: "dbSet" + - type: "metastore.report" id: 3 active: true - displayName: "Managed Non-ACID to ACID Table Migrations" + displayName: "Legacy Managed Non-Transactional (non-acid) Conversions to EXTERNAL/PURGE" + title: "# Legacy Managed Non-Transactional (non-acid) Conversions to EXTERNAL/PURGE (v.${Implementation-Version})" queryDefinitionReference: "/hive_u3_queries.yaml" - errorDescription: "Issues" - successDescription: "SQL Conversion Script" - errorFilename: "managed_upgrade_2_acid_issues.txt" - successFilename: "managed_upgrade_2_acid.sql" - dbListingQuery: "db_tbl_count" - listingColumns: [ "db_name" ,"tbl_name" , "created_date", "tbl_type" ,"tbl_location" ] - pathsListingQuery: "managed_2_acid" - hsmmElements: - databaseField: "db_name" - tableField: "tbl_name" - skipCommandCheck: - displayName: "Hive 3 Upgrade Check - Potential ACID Conversions (Skip Command Check)" - title: "-- Managed Non - ACID to ACID Table Migrations (v.${Implementation-Version})\n" - note: " - -- This report highlights classic Managed Non-ACID tables that _could_ be converted to ACID tables by the post hive - 'strictmigration' process. This will dramatically change the characteristics of the table and affect the integration - methods used by Spark SQL, direct HDFS access, etc.. Many of which will 'fail' and/or 'corrupt' the newly converted - Hive ACID table.\n - - -- We recommend that you proactively force these tables to 'EXTERNAL' tables (with purge characteristics) to prevent this - conversion. Changing tables to ACID, post migration is much easier than trying to revert converted ACID tables back - to non-ACID tables.\n - - -- Tables that are managed and aren't converted by this program, will be converted to 'EXTERNAL/purge' when they don't - meet all the criteria for the conversion, regardless. Using the 'action' elements of this report simply ensures - you are in control of the migration process.\n - - -- These conversions need to happen BEFORE the tables are accessed in CDP. Generally, this is done AFTER the upgrade - to CDP and BEFORE the tables are turned over to the end users. In some cases, due to the volume of changes needed, - the conversion can be run before the upgrade. BUT the conversion WILL have an impact on table behavior. Convert tables - in the legacy environment will NOT honor the 'purge' characteristics stated by the 'external.table.purge' TBLPROPERTIES. - So be advised of this caveat if you choose to run these conversions BEFORE the upgrade.\n - - -- The report includes the tables 'created_date' in the comment line above the 'ALTER'. You can use:\n - -- grep -A1 'created date: yyyy-MM' managed_upgrade_2_acid.sql | grep 'ALTER TABLE' > beeline_run.sql\n - -- OR - -- grep 'ALTER TABLE' managed_upgrade_2_acid.sql | sort -k3 > beeline_run.sql\n - -- to target certain tables first and run this post upgrade to make them available asap.\n - - -- Consider breaking the file into multiple parts to increase throughput and reduce runtime.\n - -- The statement are meant to be run against 'hive'. Use 'beeline -f '\n - - -- NOTE: This process CAN and SHOULD be run after the upgrade and after all the ALTER scripts have been run to ensure - -- you haven't missed any conversion.\n\n" - record: "-- This table 'could' be migrated to an ACID table unless changed.\n - -- Recommend forcing the manual conversion to ensure table isn't inadvertently migrated.\n - -- created date: %3$s\n - ALTER TABLE `%1$s`.`%2$s` SET TBLPROPERTIES('EXTERNAL'='TRUE', 'external.table.purge'='true');" - commandChecks: - - displayName: "Hive 3 Upgrade Check - Potential ACID Conversions" - title: "-- Managed Non - ACID to ACID Table Migrations (v.${Implementation-Version})\n" - note: " - -- This report highlights classic Managed Non-ACID tables that _could_ be converted to ACID tables by the post hive - 'strictmigration' process. This will dramatically change the characteristics of the table and affect the integration - methods used by Spark SQL, direct HDFS access, etc.. Many of which will 'fail' and/or 'corrupt' the newly converted - Hive ACID table.\n - - -- We recommend that you proactively force these tables to 'EXTERNAL' tables (with purge characteristics) to prevent this - conversion. Changing tables to ACID, post migration is much easier than trying to revert converted ACID tables back - to non-ACID tables.\n - - -- Tables that are managed and aren't converted by this program, will be converted to 'EXTERNAL/purge' when they don't - meet all the criteria for the conversion, regardless. Using the 'action' elements of this report simply ensures - you are in control of the migration process.\n\n" - invertCheck: false - pathCommand: "lsp -f user -self \"%5$s\"" - onSuccessPathCommand: "\"Rewrite database table: %1$s.%2$s [Partition: \" + com.cloudera.utils.hive.sre.Utils.dirToPartitionSpec('%5$s') + \"]" - onSuccessRecordCommand: "# Bad filename %2$s in directory: %1$s " - reportOnResults: false - reportOnPath: false - processOnError: false - processOnSuccess: true - checkCalculations: - ownershipCheck: - RECORDS: - test: "if (\"%5$s\".equals(\"%6$s\")) true; else false;" - pass: "\"-- Table is owned by '%5$s' and not currently ACID.\\n - -- This table will be migrated unless changed.\\n - -- created date: %3$s\\n - ALTER TABLE `%1$s`.`%2$s` SET TBLPROPERTIES('EXTERNAL'='TRUE', 'external.table.purge'='true')\\;\"" - fail: "\"-- Table is owned by '%5$s', not '%6$s', and NOT currently ACID.\\n - -- This table 'could' be migrated to an ACID table unless changed.\\n - -- Recommend forcing the manual conversion to ensure table isn't inadvertently migrated.\\n - -- created date: %3$s\\n - ALTER TABLE `%1$s`.`%2$s` SET TBLPROPERTIES('EXTERNAL'='TRUE', 'external.table.purge'='true')\\;\"" - params: [ "hive" ] + errorDescription: "Processing Issues" + successDescription: "Hive Metastore" + errorFilename: "legacy_managed_upgrade_issues.txt" + successFilename: "legacy_managed_upgrade.md" + metastoreQueryDefinitions: + - query: "legacy_managed" + listingColumns: [ "name" ,"tbl_name" , "created_date", "tbl_type" ,"location" ] + resultMessageHeader: " + This is a list of tables that meet the LEGACY Managed non-transactional criteria. Use this to understand the scope of the change. + \n\n + > Run the `u3e` process to execute the fixes for this list of tables. + \n\n" + resultMessageDetailHeader: "| Database | Table | Create Date | Type | Location |\n|:---|:---|:---|:---|:---|" + resultMessageDetailTemplate: "| %1$s | %2$s | %3$s | %4$s | %5$s |" - type: "dbSet" id: 4 active: true @@ -301,28 +230,26 @@ processes: resultMessageHeader: "\n## Table Partition Count" resultMessageDetailHeader: "| Database | Tables | Partitions |\n|:---|:---|:---|" resultMessageDetailTemplate: "| %1$s | %2$s | %3$s |" - - type: "dbSet" + - type: "metastore.report" id: 6 active: true displayName: "Hive ACID tables missing 'bucketing_version'='2' tblproperties" + title: "# Hive ACID tables missing 'bucketing_version'='2' tblproperties (v.${Implementation-Version})" queryDefinitionReference: "/hive_u3_queries.yaml" errorDescription: "Issues" successDescription: "SQL Conversion Script" errorFilename: "acid_bucketing_update_issues.txt" - successFilename: "acid_bucketing_update.sql" - dbListingQuery: "db_tbl_count" - listingColumns: [ "db_name" ,"tbl_name" , "tbl_id"] - pathsListingQuery: "acid_bucket_version" - hsmmElements: - databaseField: "db_name" - tableField: "tbl_name" - skipCommandCheck: - displayName: "Hive ACID tables missing 'bucketing_version'='2' tblproperties" - title: "-- Hive ACID tables missing 'bucketing_version'='2' tblproperties (v.${Implementation-Version})\n" - note: "-- This report will show the ACID tables that don't contain the required 'bucketing_version=2' tblproperties configuration.\n - -- Without this setting, you will have issues access/reading/processing the upgraded ACID table.\n - -- We recommend that you run the script to add this configuration value to the tables.\n\n" - record: "ALTER TABLE `%1$s`.`%2$s` SET TBLPROPERTIES('bucketing_version'='2');" + successFilename: "acid_bucketing_update.md" + metastoreQueryDefinitions: + - query: "acid_bucket_report" + listingColumns: [ "db_name" ,"tbl_name" ] + resultMessageHeader: " + This report will show the ACID tables that don't contain the required 'bucketing_version=2' tblproperties configuration.\n + Without this setting, you will have issues access/reading/processing the upgraded ACID table.\n + We recommend that you run the `u3e` process to correct after the upgrade.\n\n + " + resultMessageDetailHeader: "| Database | Table |\n|:---|:---|" + resultMessageDetailTemplate: "| %1$s | %2$s |" - type: "metastore.report" id: 7 active: true @@ -431,3 +358,78 @@ processes: " resultMessageDetailHeader: "| Database | Table | Type | Location | Create Date |\n|:---|:---|:---|:---|:---|" resultMessageDetailTemplate: "| %1$s | %2$s | %3$s | %4$s | %5$s |" + - type: "dbSet" + id: 8 + active: false + displayName: "Managed Non-ACID to ACID Table Migrations" + queryDefinitionReference: "/hive_u3_queries.yaml" + errorDescription: "Issues" + successDescription: "SQL Conversion Script" + errorFilename: "managed_upgrade_2_acid_issues.txt" + successFilename: "managed_upgrade_2_acid.sql" + dbListingQuery: "db_tbl_count" + listingColumns: [ "db_name" ,"tbl_name" , "created_date", "tbl_type" ,"tbl_location" ] + pathsListingQuery: "managed_2_acid" + hsmmElements: + databaseField: "db_name" + tableField: "tbl_name" + skipCommandCheck: + displayName: "Hive 3 Upgrade Check - Potential ACID Conversions (Skip Command Check)" + title: "-- Managed Non - ACID to ACID Table Migrations (v.${Implementation-Version})\n" + note: " + -- This report highlights classic Managed Non-ACID tables that _could_ be converted to ACID tables by the post hive + 'strictmigration' process. This will dramatically change the characteristics of the table and affect the integration + methods used by Spark SQL, direct HDFS access, etc.. Many of which will 'fail' and/or 'corrupt' the newly converted + Hive ACID table.\n + + -- We recommend that you proactively force these tables to 'EXTERNAL' tables (with purge characteristics) to prevent this + conversion. Changing tables to ACID, post migration is much easier than trying to revert converted ACID tables back + to non-ACID tables.\n + + -- Tables that are managed and aren't converted by this program, will be converted to 'EXTERNAL/purge' when they don't + meet all the criteria for the conversion, regardless. Using the 'action' elements of this report simply ensures + you are in control of the migration process.\n + + -- These conversions need to happen BEFORE the tables are accessed in CDP. Generally, this is done AFTER the upgrade + to CDP and BEFORE the tables are turned over to the end users. In some cases, due to the volume of changes needed, + the conversion can be run before the upgrade. BUT the conversion WILL have an impact on table behavior. Convert tables + in the legacy environment will NOT honor the 'purge' characteristics stated by the 'external.table.purge' TBLPROPERTIES. + So be advised of this caveat if you choose to run these conversions BEFORE the upgrade.\n + + -- The report includes the tables 'created_date' in the comment line above the 'ALTER'. You can use:\n + -- grep -A1 'created date: yyyy-MM' managed_upgrade_2_acid.sql | grep 'ALTER TABLE' > beeline_run.sql\n + -- OR + -- grep 'ALTER TABLE' managed_upgrade_2_acid.sql | sort -k3 > beeline_run.sql\n + -- to target certain tables first and run this post upgrade to make them available asap.\n + + -- Consider breaking the file into multiple parts to increase throughput and reduce runtime.\n + -- The statement are meant to be run against 'hive'. Use 'beeline -f '\n + + -- NOTE: This process CAN and SHOULD be run after the upgrade and after all the ALTER scripts have been run to ensure + -- you haven't missed any conversion.\n\n" + record: "-- This table 'could' be migrated to an ACID table unless changed.\n + -- Recommend forcing the manual conversion to ensure table isn't inadvertently migrated.\n + -- created date: %3$s\n + ALTER TABLE `%1$s`.`%2$s` SET TBLPROPERTIES('EXTERNAL'='TRUE', 'external.table.purge'='true');" + - type: "dbSet" + id: 9 + active: false + displayName: "Hive ACID tables missing 'bucketing_version'='2' tblproperties" + queryDefinitionReference: "/hive_u3_queries.yaml" + errorDescription: "Issues" + successDescription: "SQL Conversion Script" + errorFilename: "acid_bucketing_update_issues.txt" + successFilename: "acid_bucketing_update.sql" + dbListingQuery: "db_tbl_count" + listingColumns: [ "db_name" ,"tbl_name" , "tbl_id"] + pathsListingQuery: "acid_bucket_version" + hsmmElements: + databaseField: "db_name" + tableField: "tbl_name" + skipCommandCheck: + displayName: "Hive ACID tables missing 'bucketing_version'='2' tblproperties" + title: "-- Hive ACID tables missing 'bucketing_version'='2' tblproperties (v.${Implementation-Version})\n" + note: "-- This report will show the ACID tables that don't contain the required 'bucketing_version=2' tblproperties configuration.\n + -- Without this setting, you will have issues access/reading/processing the upgraded ACID table.\n + -- We recommend that you run the script to add this configuration value to the tables.\n\n" + record: "ALTER TABLE `%1$s`.`%2$s` SET TBLPROPERTIES('bucketing_version'='2');" diff --git a/src/main/resources/u3e/MYSQL/hive_u3e_queries.yaml b/src/main/resources/u3e/MYSQL/hive_u3e_queries.yaml new file mode 100644 index 0000000..a87233a --- /dev/null +++ b/src/main/resources/u3e/MYSQL/hive_u3e_queries.yaml @@ -0,0 +1,133 @@ +query_definitions: + fix_legacy_managed: + preparedStatements: + fix_table_external_params: " + INSERT INTO TABLE_PARAMS + SELECT + T.TBL_ID + , 'EXTERNAL' + , 'TRUE' + FROM TBLS T + INNER JOIN DBS D ON T.DB_ID = D.DB_ID + WHERE + T.TBL_ID NOT IN ( + SELECT + T2.TBL_ID + FROM TABLE_PARAMS TP + INNER JOIN TBLS T2 ON TP.TBL_ID = T2.TBL_ID + WHERE + TP.PARAM_KEY = 'transactional' + AND BINARY TP.PARAM_VALUE = 'true') + AND TBL_TYPE = 'MANAGED_TABLE' + ON DUPLICATE KEY UPDATE PARAM_VALUE = 'TRUE' + " + add_purge_flag: " + INSERT INTO TABLE_PARAMS + SELECT + T.TBL_ID + , 'external.table.purge' + , 'TRUE' + FROM TBLS T + INNER JOIN DBS D ON T.DB_ID = D.DB_ID + WHERE + T.TBL_ID NOT IN ( + SELECT + T2.TBL_ID + FROM TABLE_PARAMS TP + INNER JOIN TBLS T2 ON TP.TBL_ID = T2.TBL_ID + WHERE + TP.PARAM_KEY = 'transactional' + AND BINARY TP.PARAM_VALUE = 'true') + AND TBL_TYPE = 'MANAGED_TABLE' + ON DUPLICATE KEY UPDATE PARAM_VALUE = 'TRUE' + " + fix_tbl_type: " + UPDATE TBLS + SET TBL_TYPE = 'EXTERNAL_TABLE' + WHERE + TBL_ID IN ( + SELECT + T2.TBL_ID + FROM ( + SELECT * + FROM TBLS) T2 + INNER JOIN DBS D ON T2.DB_ID = D.DB_ID + WHERE + T2.TBL_ID NOT IN ( + SELECT + T3.TBL_ID + FROM TABLE_PARAMS TP + INNER JOIN ( + SELECT * + FROM TBLS) T3 ON TP.TBL_ID = T3.TBL_ID + WHERE + TP.PARAM_KEY = 'transactional' + AND BINARY TP.PARAM_VALUE = 'true') + AND TBL_TYPE = 'MANAGED_TABLE') + " + acid_table: + preparedStatements: + fix_bucketing_version: " + INSERT INTO TABLE_PARAMS + SELECT + TBL_ID + , 'bucketing_version' + , '2' + FROM ( + SELECT DISTINCT + DB.NAME AS DB_NAME + , TBL.TBL_NAME + , TBL.TBL_ID + FROM DBS DB + JOIN + TBLS TBL ON + DB.DB_ID = TBL.DB_ID + INNER JOIN SDS S ON TBL.SD_ID = S.SD_ID + INNER JOIN ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'transactional' + AND LOWER(SP.PARAM_VALUE) = 'true' + ) P + ON TBL.TBL_ID = P.TBL_ID + ) WCAT + WHERE + WCAT.TBL_ID NOT IN ( + SELECT + TBL_ID + FROM ( + SELECT + SP.TBL_ID + , SP.PARAM_KEY + , SP.PARAM_VALUE + FROM TABLE_PARAMS SP + WHERE + LOWER(SP.PARAM_KEY) = 'bucketing_version' + AND SP.PARAM_VALUE = '2' + ) BPARAMS) + ON DUPLICATE KEY UPDATE PARAM_VALUE = '2' + " + serde_exchange: + preparedStatements: + regex_serde: " + UPDATE SERDES + SET SLIB = 'org.apache.hadoop.hive.serde2.RegexSerDe' + WHERE + SLIB = 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' + " + typed_bytes_serde: " + UPDATE SERDES + SET SLIB = 'org.apache.hadoop.hive.serde2.TypedBytesSerDe' + WHERE + SLIB = 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe' + " + multi_delimit_serde: " + UPDATE SERDES + SET SLIB = 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe' + WHERE + SLIB = 'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe' + " diff --git a/src/main/resources/u3e/proc/hive_u3e_procs.yaml b/src/main/resources/u3e/proc/hive_u3e_procs.yaml new file mode 100644 index 0000000..6f3b8aa --- /dev/null +++ b/src/main/resources/u3e/proc/hive_u3e_procs.yaml @@ -0,0 +1,51 @@ +# Copyright 2021 Cloudera, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +module: "u3e" +processes: + - type: "metastore.action" + transactional: true + id: 1 + active: true + displayName: "Legacy Managed Non-Transactional (non-acid) Conversions to EXTERNAL/PURGE" + title: "# Legacy Managed Non-Transactional (non-acid) Conversions to EXTERNAL/PURGE (v.${Implementation-Version})" + queryDefinitionReference: "/hive_u3e_queries.yaml" + errorDescription: "Processing Issues" + successDescription: "Hive Metastore" + errorFilename: "legacy_managed_upgrade_issues.txt" + successFilename: "legacy_managed_upgrade.md" + actionDefinition: "fix_legacy_managed" + - type: "metastore.action" + transactional: true + id: 2 + active: true + displayName: "ACID Table bucketing version update" + title: "# ACID Table bucketing version update (v.${Implementation-Version})" + queryDefinitionReference: "/hive_u3e_queries.yaml" + errorDescription: "Processing Issues" + successDescription: "Hive Metastore" + errorFilename: "acid_bucketing_upgrade_issues.txt" + successFilename: "acid_bucketing_upgrade.md" + actionDefinition: "acid_table" + - type: "metastore.action" + transactional: true + id: 3 + active: true + displayName: "Legacy Contrib Serde2 Replacement" + title: "# Legacy Contrib Serde2 Replacement (v.${Implementation-Version})" + queryDefinitionReference: "/hive_u3e_queries.yaml" + errorDescription: "Processing Issues" + successDescription: "Hive Metastore" + errorFilename: "legacy_contrib_serde2_upgrade_issues.txt" + successFilename: "legacy_contrib_serde2_upgrade.md" + actionDefinition: "serde_exchange" diff --git a/src/test/java/com/cloudera/utils/hive/config/SreSQLMySqlTest.java b/src/test/java/com/cloudera/utils/hive/config/SreSQLMySqlTest.java index ae37b1e..2f9a10a 100644 --- a/src/test/java/com/cloudera/utils/hive/config/SreSQLMySqlTest.java +++ b/src/test/java/com/cloudera/utils/hive/config/SreSQLMySqlTest.java @@ -30,6 +30,14 @@ public void u3MYSQL_SQLTest() { Sre.main(new String[]{"u3", "-tsql", "-all", "-cfg", cfg,"-o", "/tmp/sre-sql-test"}); } + @Test + public void u3eMYSQL_SQLTest() { + String cfg = System.getProperty("user.home") + System.getProperty("file.separator") + + ".hive-sre/cfg" + System.getProperty("file.separator") + + "default-" + DBStore.DB_TYPE.MYSQL + ".yaml"; + Sre.main(new String[]{"u3e", "-tsql", "-all", "-cfg", cfg,"-o", "/tmp/sre-sql-test"}); + } + @Test public void sreMYSQL_SQLTest() { String cfg = System.getProperty("user.home") + System.getProperty("file.separator") + diff --git a/u3.md b/u3.md index 887a566..5188fdf 100644 --- a/u3.md +++ b/u3.md @@ -33,27 +33,29 @@ If you are doing the Expedited Hive Upgrade process there are two options: Runs reports: - [1](./sample_reports/u3/loc_scan_missing_dirs.md) - Table / Partition Location Scan - Missing Directories -- [3](./sample_reports/u3/managed_upgrade_2_acid.sql) - Hive 3 Upgrade Checks - Managed Non-ACID to ACID Table Migrations +- [3](./sample_reports/u3/managed_upgrade_2_acid.sql) - Hive 3 Upgrade Checks - List Managed Non-ACID tables that need to be converted. (Run `u3e` to process) - [4](./sample_reports/u3/managed_compactions.sql) - Hive 3 Upgrade Checks - Compaction Check - [5](./sample_reports/u3/hms_checks.md) - Hive Metastore Check
- Questionable Serde's Check
- Questionable Serde's Check
- Leagcy Kudu Serde Report
- Legacy Decimal Scale/Precision Check
- List Databases with Table/Partition Counts -- [6](./sample_reports/u3/acid_bucketing_update.sql) - Provide Script for ACID tables missing the 'bucketing_version' property +- [6](./sample_reports/u3/acid_bucketing_update.sql) - List ACID tables missing the 'bucketing_version' property (Run `u3e` to process) - 7 - List tables using known Storage Handlers +- 8 - Old Legacy Managed to External (sql script) +- 9 - Old ACID tables missing bucketing version (sql script) ##### HDP3 Runs reports: - [1](./sample_reports/u3/loc_scan_missing_dirs.md) - Table / Partition Location Scan - Missing Directories - [5](./sample_reports/u3/hms_checks.md) - Hive Metastore Check
- Questionable Serde's Check
- Questionable Serde's Check
- Leagcy Kudu Serde Report
- Legacy Decimal Scale/Precision Check
- List Databases with Table/Partition Counts -- [6](./sample_reports/u3/acid_bucketing_update.sql) - Provide Script for ACID tables missing the 'bucketing_version' property +- [6](./sample_reports/u3/acid_bucketing_update.sql) - List ACID tables missing the 'bucketing_version' property ##### CDH Runs reports: - [1](./sample_reports/u3/loc_scan_missing_dirs.md) - Table / Partition Location Scan - Missing Directories -- [3](./sample_reports/u3/managed_upgrade_2_acid.sql) - Hive 3 Upgrade Checks - Managed Non-ACID to ACID Table Migrations +- [3](./sample_reports/u3/managed_upgrade_2_acid.sql) - Hive 3 Upgrade Checks - List Managed Non-ACID tables that need to be converted. - [5](./sample_reports/u3/hms_checks.md) - Hive Metastore Check
- Questionable Serde's Check
- Questionable Serde's Check
- Leagcy Kudu Serde Report
- Legacy Decimal Scale/Precision Check
- List Databases with Table/Partition Counts -- [6](./sample_reports/u3/acid_bucketing_update.sql) - Provide Script for ACID tables missing the 'bucketing_version' property +- [6](./sample_reports/u3/acid_bucketing_update.sql) - List ACID tables missing the 'bucketing_version' property - 7 - List tables using known Storage Handlers ### Testing diff --git a/u3e.md b/u3e.md new file mode 100644 index 0000000..940269a --- /dev/null +++ b/u3e.md @@ -0,0 +1,29 @@ +## Hive 3 Upgrade Execution Process + +> This process is meant to run **AFTER** the upgrade and before the system is release for consumer access. It will work against the metastore DB directly, which is much faster than running the previous HiveSQL scripts for each table. You __can__ run this against the older Hive versions, but it may render some tables in a state that is NOT compatible with Hive 1/2. +> +> We suggest running this process against a 'copy' of the metastore db to understand the timings for upgrade planning. + +Upgrade to CDP can run this process instead of the 'full' HSMM process run during the upgrade. See the [Hive Expedited Upgrade](https://docs.cloudera.com/cdp-private-cloud-upgrade/latest/upgrade-cdh/topics/hive-expedited-migration-tasks.html). If you run this `u3e` process, most of the work will be done already. Create an empty includes file for HSMM and add it to the configuration as directed in the link above. This will ensure any 'database' changes are also applied. + +At this time, this process is ONLY available to MySQL/MariaDB backend metastore DB's. + +After running the `u3` process to review the extent of the changes, you can run this script **AFTER** the upgrade to Hive 3 to make the changes that cover: + +- Legacy Managed Non-Transactional (non-acid) Conversions to EXTERNAL/PURGE + - Will convert all LEGACY MANAGED tables to EXTERNAL/PURGE +- ACID Table bucketing version update + - Adds the correct bucket version property to ACID tables. +- Legacy Contrib Serde2 Replacement + - Handles the conversions of: + - `org.apache.hadoop.hive.contrib.serde2.RegexSerDe` + - `org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe` + - `org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe` + - Other Serdes identified in the `u3` process will need to be independently addressed. + +These scripts will execute directly against the METASTORE DB, skipping the HiveSQL interface. This process is considerably faster than processing these requests through the HiveSQL interface 1 at a time. + +### This process does NOT address + +- Kudu Serde / Table Upgrades. See: https://kudu.apache.org/docs/hive_metastore.html +- Legacy Decimal (Hive 0.12) issues. Refer to description of these in the `u3` report.