From 30ed8fa51fd8647dfdd7acea6d2a6215a87418c7 Mon Sep 17 00:00:00 2001 From: Boris Kozlov Date: Wed, 28 Feb 2024 16:16:35 +0100 Subject: [PATCH] DBMON-2051: Adding a deadlock metric to MySQL databases (#16904) * DBMON-2051: Adding a deadlock metric to Maria DB * removing print outs * removed prints * trying to create a proper deadlock * Fix thread pool limit * Add a check * Add wait to let update db threads to finish * Add select query * fix query * revert changes to conftest * fix test * Add events * remove obsolete diff * cleaned up pr * moved tests to the end * Put back empty lines * put back empty line * added a changelog * changed pr number in changelog * removed patched config * reverting back conftest * reverted common py * applied linter * Added the correct changelog * add a check for innnodb * removed superfluous method * removed whitespace * changed to monotonic_count * removed check from constructor * modified variable names * fixed changelog and test error * improved the changelog --------- Co-authored-by: root --- mysql/changelog.d/16904.added | 1 + mysql/datadog_checks/mysql/mysql.py | 35 ++++++----- mysql/datadog_checks/mysql/queries.py | 13 ++++ mysql/metadata.csv | 1 + mysql/tests/test_query_activity.py | 87 +++++++++++++++++++++++++++ mysql/tests/variables.py | 1 + 6 files changed, 124 insertions(+), 14 deletions(-) create mode 100644 mysql/changelog.d/16904.added diff --git a/mysql/changelog.d/16904.added b/mysql/changelog.d/16904.added new file mode 100644 index 00000000000000..898fc888b9942e --- /dev/null +++ b/mysql/changelog.d/16904.added @@ -0,0 +1 @@ +Adding a deadlock metric to MySQL databases. diff --git a/mysql/datadog_checks/mysql/mysql.py b/mysql/datadog_checks/mysql/mysql.py index 37356d8f2c73d5..ac585f7afd3e3b 100644 --- a/mysql/datadog_checks/mysql/mysql.py +++ b/mysql/datadog_checks/mysql/mysql.py @@ -56,6 +56,7 @@ from .innodb_metrics import InnoDBMetrics from .metadata import MySQLMetadata from .queries import ( + QUERY_DEADLOCKS, QUERY_USER_CONNECTIONS, SQL_95TH_PERCENTILE, SQL_AVG_QUERY_RUN_TIME, @@ -135,11 +136,12 @@ def __init__(self, name, init_config, instances): ttl=self._config.database_instance_collection_interval, ) # type: TTLCache - self._runtime_queries = None + self._runtime_queries_cached = None # Keep a copy of the tags without the internal resource tags so they can be used for paths that don't # go through the agent internal metrics submission processing those tags self._non_internal_tags = copy.deepcopy(self.tags) self.set_resource_tags() + self._is_innodb_engine_enabled_cached = None def execute_query_raw(self, query): with closing(self._conn.cursor(pymysql.cursors.SSCursor)) as cursor: @@ -297,8 +299,8 @@ def check(self, _): if not self._config.only_custom_queries: self._collect_metrics(db, tags=tags) self._collect_system_metrics(self._config.host, db, tags) - if self.runtime_queries: - self.runtime_queries.execute(extra_tags=tags) + if self._get_runtime_queries(db): + self._get_runtime_queries(db).execute(extra_tags=tags) if self._config.dbm_enabled: dbm_tags = list(set(self.service_check_tags) | set(tags)) @@ -335,23 +337,25 @@ def _new_query_executor(self, queries): track_operation_time=True, ) - @property - def runtime_queries(self): + def _get_runtime_queries(self, db): """ Initializes runtime queries which depend on outside factors (e.g. permission checks) to load first. """ - if self._runtime_queries: - return self._runtime_queries + if self._runtime_queries_cached: + return self._runtime_queries_cached queries = [] + if self._check_innodb_engine_enabled(db): + queries.extend([QUERY_DEADLOCKS]) + if self.performance_schema_enabled: queries.extend([QUERY_USER_CONNECTIONS]) - self._runtime_queries = self._new_query_executor(queries) - self._runtime_queries.compile_queries() + self._runtime_queries_cached = self._new_query_executor(queries) + self._runtime_queries_cached.compile_queries() self.log.debug("initialized runtime queries") - return self._runtime_queries + return self._runtime_queries_cached def _set_qcache_stats(self): host_key = self._get_host_key() @@ -448,7 +452,7 @@ def _collect_metrics(self, db, tags): if not is_affirmative( self._config.options.get('disable_innodb_metrics', False) - ) and self._is_innodb_engine_enabled(db): + ) and self._check_innodb_engine_enabled(db): with tracked_query(self, operation="innodb_metrics"): results.update(self.innodb_stats.get_stats_from_innodb_status(db)) self.innodb_stats.process_innodb_stats(results, self._config.options, metrics) @@ -1000,18 +1004,21 @@ def _get_binary_log_stats(self, db): self.warning("Privileges error accessing the BINARY LOGS (must grant REPLICATION CLIENT): %s", e) return None - def _is_innodb_engine_enabled(self, db): + def _check_innodb_engine_enabled(self, db): # Whether InnoDB engine is available or not can be found out either # from the output of SHOW ENGINES or from information_schema.ENGINES # table. Later is chosen because that involves no string parsing. + if self._is_innodb_engine_enabled_cached is not None: + return self._is_innodb_engine_enabled_cached try: with closing(db.cursor()) as cursor: cursor.execute(SQL_INNODB_ENGINES) - return cursor.rowcount > 0 + self._is_innodb_engine_enabled_cached = cursor.rowcount > 0 except (pymysql.err.InternalError, pymysql.err.OperationalError, pymysql.err.NotSupportedError) as e: self.warning("Possibly innodb stats unavailable - error querying engines table: %s", e) - return False + self._is_innodb_engine_enabled_cached = False + return self._is_innodb_engine_enabled_cached def _get_replica_stats(self, db, is_mariadb, replication_channel): replica_results = defaultdict(dict) diff --git a/mysql/datadog_checks/mysql/queries.py b/mysql/datadog_checks/mysql/queries.py index d7496c81cf226b..987aa0a3f89a1c 100644 --- a/mysql/datadog_checks/mysql/queries.py +++ b/mysql/datadog_checks/mysql/queries.py @@ -74,6 +74,19 @@ SELECT plugin_status FROM information_schema.plugins WHERE plugin_name='group_replication'""" +QUERY_DEADLOCKS = { + 'name': 'information_schema.INNODB_METRICS.lock_deadlocks', + 'query': """ + SELECT + count as deadlocks + FROM + information_schema.INNODB_METRICS + WHERE + NAME='lock_deadlocks' + """.strip(), + 'columns': [{'name': 'mysql.innodb.deadlocks', 'type': 'monotonic_count'}], +} + QUERY_USER_CONNECTIONS = { 'name': 'performance_schema.threads', 'query': """ diff --git a/mysql/metadata.csv b/mysql/metadata.csv index ff36b13c65f172..4c7451bb3f40d0 100644 --- a/mysql/metadata.csv +++ b/mysql/metadata.csv @@ -12,6 +12,7 @@ mysql.innodb.buffer_pool_utilization,gauge,,fraction,,The utilization of the Inn mysql.innodb.current_row_locks,gauge,,lock,,The number of current row locks.,-1,mysql,current row locks, mysql.innodb.data_reads,gauge,,read,second,The rate of data reads.,0,mysql,data reads, mysql.innodb.data_writes,gauge,,write,second,The rate of data writes.,0,mysql,data writes, +mysql.innodb.deadlocks,count,,lock,,"The number of deadlocks.",0,mysql,number of deadlocks, mysql.innodb.mutex_os_waits,gauge,,event,second,The rate of mutex OS waits.,0,mysql,mutex os waits, mysql.innodb.mutex_spin_rounds,gauge,,event,second,The rate of mutex spin rounds.,0,mysql,mutex spin rounds, mysql.innodb.mutex_spin_waits,gauge,,event,second,The rate of mutex spin waits.,0,mysql,mutex spin waits, diff --git a/mysql/tests/test_query_activity.py b/mysql/tests/test_query_activity.py index 308585dcf2a62f..413cf3bbd76882 100644 --- a/mysql/tests/test_query_activity.py +++ b/mysql/tests/test_query_activity.py @@ -11,6 +11,7 @@ from copy import copy from datetime import datetime from os import environ +from threading import Event import mock import pymysql @@ -478,6 +479,92 @@ def _expected_dbm_job_err_tags(dbm_instance): ] +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_if_deadlock_metric_is_collected(aggregator, dd_run_check, dbm_instance): + check = MySql(CHECK_NAME, {}, [dbm_instance]) + dd_run_check(check) + deadlock_metric = aggregator.metrics("mysql.innodb.deadlocks") + + assert len(deadlock_metric) == 1, "there should be one deadlock metric" + + +@pytest.mark.skipif( + environ.get('MYSQL_FLAVOR') == 'mariadb' or MYSQL_VERSION_PARSED < parse_version('8.0'), + reason='Deadock count is not updated in MariaDB or older MySQL versions', +) +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_deadlocks(aggregator, dd_run_check, dbm_instance): + check = MySql(CHECK_NAME, {}, [dbm_instance]) + dd_run_check(check) + deadlocks_start = 0 + deadlock_metric_start = aggregator.metrics("mysql.innodb.deadlocks") + + assert len(deadlock_metric_start) == 1, "there should be one deadlock metric" + + deadlocks_start = deadlock_metric_start[0].value + + first_query = "SELECT * FROM testdb.users WHERE id = 1 FOR UPDATE;" + second_query = "SELECT * FROM testdb.users WHERE id = 2 FOR UPDATE;" + + def run_first_deadlock_query(conn, event1, event2): + conn.begin() + try: + conn.cursor().execute("START TRANSACTION;") + conn.cursor().execute(first_query) + event1.set() + event2.wait() + conn.cursor().execute(second_query) + conn.cursor().execute("COMMIT;") + except Exception as e: + # Exception is expected due to a deadlock + print(e) + pass + conn.commit() + + def run_second_deadlock_query(conn, event1, event2): + conn.begin() + try: + event1.wait() + conn.cursor().execute("START TRANSACTION;") + conn.cursor().execute(second_query) + event2.set() + conn.cursor().execute(first_query) + conn.cursor().execute("COMMIT;") + except Exception as e: + # Exception is expected due to a deadlock + print(e) + pass + conn.commit() + + bob_conn = _get_conn_for_user('bob') + fred_conn = _get_conn_for_user('fred') + + executor = concurrent.futures.thread.ThreadPoolExecutor(2) + + event1 = Event() + event2 = Event() + + futures_first_query = executor.submit(run_first_deadlock_query, bob_conn, event1, event2) + futures_second_query = executor.submit(run_second_deadlock_query, fred_conn, event1, event2) + futures_first_query.result() + futures_second_query.result() + # Make sure innodb is updated. + time.sleep(0.3) + bob_conn.close() + fred_conn.close() + executor.shutdown() + + dd_run_check(check) + + deadlock_metric_end = aggregator.metrics("mysql.innodb.deadlocks") + + assert ( + len(deadlock_metric_end) == 2 and deadlock_metric_end[1].value - deadlocks_start == 1 + ), "there should be one new deadlock" + + def _get_conn_for_user(user, _autocommit=False): return pymysql.connect(host=HOST, port=PORT, user=user, password=user, autocommit=_autocommit) diff --git a/mysql/tests/variables.py b/mysql/tests/variables.py index 193856391853b8..22f3b854ddaf74 100644 --- a/mysql/tests/variables.py +++ b/mysql/tests/variables.py @@ -90,6 +90,7 @@ 'mysql.innodb.buffer_pool_read_requests', 'mysql.innodb.buffer_pool_reads', 'mysql.innodb.buffer_pool_utilization', + 'mysql.innodb.deadlocks', ] COMPLEX_INNODB_VARS = [