Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DBMON-2051: Adding a deadlock metric to MySQL databases #16904

Merged
merged 32 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f6da4ae
DBMON-2051: Adding a deadlock metric to Maria DB
Feb 19, 2024
f0a7bcf
removing print outs
kozlovb Feb 20, 2024
402fa6f
removed prints
kozlovb Feb 20, 2024
cdf9819
trying to create a proper deadlock
kozlovb Feb 20, 2024
89728de
Fix thread pool limit
kozlovb Feb 20, 2024
af655a4
Add a check
kozlovb Feb 20, 2024
9f04c81
Add wait to let update db threads to finish
kozlovb Feb 20, 2024
c87e65c
Add select query
kozlovb Feb 20, 2024
e1c2b7a
fix query
kozlovb Feb 20, 2024
6b1eb22
revert changes to conftest
kozlovb Feb 21, 2024
0c6b230
fix test
kozlovb Feb 21, 2024
478063b
Add events
kozlovb Feb 21, 2024
03efd98
remove obsolete diff
kozlovb Feb 21, 2024
deeddbb
cleaned up pr
kozlovb Feb 22, 2024
dc5693d
moved tests to the end
kozlovb Feb 22, 2024
6c7dc2e
Put back empty lines
kozlovb Feb 22, 2024
1492844
put back empty line
kozlovb Feb 22, 2024
7441ad0
added a changelog
kozlovb Feb 22, 2024
edd227c
changed pr number in changelog
kozlovb Feb 22, 2024
8abc8bf
removed patched config
kozlovb Feb 22, 2024
ffd08e1
reverting back conftest
kozlovb Feb 22, 2024
5c4e7ba
reverted common py
kozlovb Feb 22, 2024
49582d0
applied linter
kozlovb Feb 22, 2024
daac79f
Added the correct changelog
kozlovb Feb 22, 2024
ee10ca3
add a check for innnodb
kozlovb Feb 23, 2024
e295f28
removed superfluous method
kozlovb Feb 23, 2024
8e89bf4
removed whitespace
kozlovb Feb 23, 2024
8bce957
changed to monotonic_count
kozlovb Feb 23, 2024
e72f952
removed check from constructor
kozlovb Feb 26, 2024
126881e
modified variable names
kozlovb Feb 26, 2024
77d44a1
fixed changelog and test error
kozlovb Feb 27, 2024
e513c14
improved the changelog
kozlovb Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mysql/changelog.d/16904.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DBMON-2051: Adding a deadlock metric to MySQL databases.
35 changes: 21 additions & 14 deletions mysql/datadog_checks/mysql/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from .innodb_metrics import InnoDBMetrics
from .metadata import MySQLMetadata
from .queries import (
QUERY_DEADLOCKS,
QUERY_USER_CONNECTIONS,
SQL_95TH_PERCENTILE,
SQL_AVG_QUERY_RUN_TIME,
Expand Down Expand Up @@ -135,11 +136,12 @@ def __init__(self, name, init_config, instances):
ttl=self._config.database_instance_collection_interval,
) # type: TTLCache

self._runtime_queries = None
self._runtime_queries_cached = None
# Keep a copy of the tags without the internal resource tags so they can be used for paths that don't
# go through the agent internal metrics submission processing those tags
self._non_internal_tags = copy.deepcopy(self.tags)
self.set_resource_tags()
self._is_innodb_engine_enabled_cached = None

def execute_query_raw(self, query):
with closing(self._conn.cursor(pymysql.cursors.SSCursor)) as cursor:
Expand Down Expand Up @@ -297,8 +299,8 @@ def check(self, _):
if not self._config.only_custom_queries:
self._collect_metrics(db, tags=tags)
self._collect_system_metrics(self._config.host, db, tags)
if self.runtime_queries:
self.runtime_queries.execute(extra_tags=tags)
if self._get_runtime_queries(db):
self._get_runtime_queries(db).execute(extra_tags=tags)

if self._config.dbm_enabled:
dbm_tags = list(set(self.service_check_tags) | set(tags))
Expand Down Expand Up @@ -335,23 +337,25 @@ def _new_query_executor(self, queries):
track_operation_time=True,
)

@property
def runtime_queries(self):
def _get_runtime_queries(self, db):
lu-zhengda marked this conversation as resolved.
Show resolved Hide resolved
"""
Initializes runtime queries which depend on outside factors (e.g. permission checks) to load first.
"""
if self._runtime_queries:
return self._runtime_queries
if self._runtime_queries_cached:
return self._runtime_queries_cached

queries = []

if self._check_innodb_engine_enabled(db):
lu-zhengda marked this conversation as resolved.
Show resolved Hide resolved
queries.extend([QUERY_DEADLOCKS])

if self.performance_schema_enabled:
queries.extend([QUERY_USER_CONNECTIONS])

self._runtime_queries = self._new_query_executor(queries)
self._runtime_queries.compile_queries()
self._runtime_queries_cached = self._new_query_executor(queries)
self._runtime_queries_cached.compile_queries()
self.log.debug("initialized runtime queries")
return self._runtime_queries
return self._runtime_queries_cached

def _set_qcache_stats(self):
host_key = self._get_host_key()
Expand Down Expand Up @@ -447,7 +451,7 @@ def _collect_metrics(self, db, tags):

if not is_affirmative(
self._config.options.get('disable_innodb_metrics', False)
) and self._is_innodb_engine_enabled(db):
) and self._check_innodb_engine_enabled(db):
lu-zhengda marked this conversation as resolved.
Show resolved Hide resolved
with tracked_query(self, operation="innodb_metrics"):
results.update(self.innodb_stats.get_stats_from_innodb_status(db))
self.innodb_stats.process_innodb_stats(results, self._config.options, metrics)
Expand Down Expand Up @@ -999,18 +1003,21 @@ def _get_binary_log_stats(self, db):
self.warning("Privileges error accessing the BINARY LOGS (must grant REPLICATION CLIENT): %s", e)
return None

def _is_innodb_engine_enabled(self, db):
def _check_innodb_engine_enabled(self, db):
# Whether InnoDB engine is available or not can be found out either
# from the output of SHOW ENGINES or from information_schema.ENGINES
# table. Later is chosen because that involves no string parsing.
if self._is_innodb_engine_enabled_cached is not None:
return self._is_innodb_engine_enabled_cached
try:
with closing(db.cursor()) as cursor:
cursor.execute(SQL_INNODB_ENGINES)
return cursor.rowcount > 0
self._is_innodb_engine_enabled_cached = cursor.rowcount > 0

except (pymysql.err.InternalError, pymysql.err.OperationalError, pymysql.err.NotSupportedError) as e:
self.warning("Possibly innodb stats unavailable - error querying engines table: %s", e)
return False
self._is_innodb_engine_enabled_cached = False
return self._is_innodb_engine_enabled_cached

def _get_replica_stats(self, db, is_mariadb, replication_channel):
replica_results = defaultdict(dict)
Expand Down
13 changes: 13 additions & 0 deletions mysql/datadog_checks/mysql/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,19 @@
SELECT plugin_status
FROM information_schema.plugins WHERE plugin_name='group_replication'"""

QUERY_DEADLOCKS = {
'name': 'information_schema.INNODB_METRICS.lock_deadlocks',
'query': """
SELECT
count as deadlocks
FROM
information_schema.INNODB_METRICS
WHERE
NAME='lock_deadlocks'
""".strip(),
'columns': [{'name': 'mysql.innodb.deadlocks', 'type': 'monotonic_count'}],
}

QUERY_USER_CONNECTIONS = {
'name': 'performance_schema.threads',
'query': """
Expand Down
1 change: 1 addition & 0 deletions mysql/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mysql.innodb.buffer_pool_utilization,gauge,,fraction,,The utilization of the Inn
mysql.innodb.current_row_locks,gauge,,lock,,The number of current row locks.,-1,mysql,current row locks,
mysql.innodb.data_reads,gauge,,read,second,The rate of data reads.,0,mysql,data reads,
mysql.innodb.data_writes,gauge,,write,second,The rate of data writes.,0,mysql,data writes,
mysql.innodb.deadlocks,count,,lock,,"The number of deadlocks.",0,mysql,number of deadlocks,
mysql.innodb.mutex_os_waits,gauge,,event,second,The rate of mutex OS waits.,0,mysql,mutex os waits,
mysql.innodb.mutex_spin_rounds,gauge,,event,second,The rate of mutex spin rounds.,0,mysql,mutex spin rounds,
mysql.innodb.mutex_spin_waits,gauge,,event,second,The rate of mutex spin waits.,0,mysql,mutex spin waits,
Expand Down
87 changes: 87 additions & 0 deletions mysql/tests/test_query_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from copy import copy
from datetime import datetime
from os import environ
from threading import Event

import mock
import pymysql
Expand Down Expand Up @@ -478,6 +479,92 @@ def _expected_dbm_job_err_tags(dbm_instance):
]


@pytest.mark.integration
@pytest.mark.usefixtures('dd_environment')
def test_if_deadlock_metric_is_collected(aggregator, dd_run_check, dbm_instance):
check = MySql(CHECK_NAME, {}, [dbm_instance])
dd_run_check(check)
deadlock_metric = aggregator.metrics("mysql.innodb.deadlocks")

assert len(deadlock_metric) == 1, "there should be one deadlock metric"


@pytest.mark.skipif(
environ.get('MYSQL_FLAVOR') == 'mariadb' or MYSQL_VERSION_PARSED < parse_version('8.0'),
reason='Deadock count is not updated in MariaDB or older MySQL versions',
)
@pytest.mark.integration
@pytest.mark.usefixtures('dd_environment')
def test_deadlocks(aggregator, dd_run_check, dbm_instance):
check = MySql(CHECK_NAME, {}, [dbm_instance])
dd_run_check(check)
deadlocks_start = 0
deadlock_metric_start = aggregator.metrics("mysql.innodb.deadlocks")

assert len(deadlock_metric_start) == 1, "there should be one deadlock metric"

deadlocks_start = deadlock_metric_start[0].value

first_query = "SELECT * FROM testdb.users WHERE id = 1 FOR UPDATE;"
second_query = "SELECT * FROM testdb.users WHERE id = 2 FOR UPDATE;"

def run_first_deadlock_query(conn, event1, event2):
conn.begin()
try:
conn.cursor().execute("START TRANSACTION;")
conn.cursor().execute(first_query)
event1.set()
event2.wait()
conn.cursor().execute(second_query)
conn.cursor().execute("COMMIT;")
except Exception as e:
# Exception is expected due to a deadlock
print(e)
pass
conn.commit()

def run_second_deadlock_query(conn, event1, event2):
conn.begin()
try:
event1.wait()
conn.cursor().execute("START TRANSACTION;")
conn.cursor().execute(second_query)
event2.set()
conn.cursor().execute(first_query)
conn.cursor().execute("COMMIT;")
except Exception as e:
# Exception is expected due to a deadlock
print(e)
pass
conn.commit()

bob_conn = _get_conn_for_user('bob')
fred_conn = _get_conn_for_user('fred')

executor = concurrent.futures.thread.ThreadPoolExecutor(2)

event1 = Event()
event2 = Event()

futures_first_query = executor.submit(run_first_deadlock_query, bob_conn, event1, event2)
futures_second_query = executor.submit(run_second_deadlock_query, fred_conn, event1, event2)
futures_first_query.result()
futures_second_query.result()
# Make sure innodb is updated.
time.sleep(0.3)
bob_conn.close()
fred_conn.close()
executor.shutdown()

dd_run_check(check)

deadlock_metric_end = aggregator.metrics("mysql.innodb.deadlocks")

assert (
len(deadlock_metric_end) == 2 and deadlock_metric_end[1].value - deadlocks_start == 1
), "there should be one new deadlock"


def _get_conn_for_user(user, _autocommit=False):
return pymysql.connect(host=HOST, port=PORT, user=user, password=user, autocommit=_autocommit)

Expand Down
1 change: 1 addition & 0 deletions mysql/tests/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
'mysql.innodb.buffer_pool_read_requests',
'mysql.innodb.buffer_pool_reads',
'mysql.innodb.buffer_pool_utilization',
'mysql.innodb.deadlocks',
]

COMPLEX_INNODB_VARS = [
Expand Down
Loading