Skip to content

Commit

Permalink
Provide a reason for not having an execution plan (MySQL) (#9569)
Browse files Browse the repository at this point in the history
* [Add] MySQL no explain reasons + test

* [Fix] Incorrect code

* [Update] Readability

* [Update] Remove message if None + test cleanup

* [Update] Unnecessary check in test

* [Update] Variable to match Postgres' implementation

* [Update] Simplified method

* [Update] Error code changes + clarity improvement

* [Add] Collect all errors + include strategy

* [Update] Misc improvements

* Add 'with' as a supported explain statement
  • Loading branch information
alexbarksdale authored Jun 29, 2021
1 parent 4ade527 commit 45fefb2
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 23 deletions.
87 changes: 68 additions & 19 deletions mysql/datadog_checks/mysql/statement_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
from concurrent.futures.thread import ThreadPoolExecutor
from contextlib import closing
from enum import Enum

import pymysql
from cachetools import TTLCache
Expand All @@ -25,7 +26,7 @@
)
from datadog_checks.base.utils.serialization import json

VALID_EXPLAIN_STATEMENTS = frozenset({'select', 'table', 'delete', 'insert', 'replace', 'update'})
SUPPORTED_EXPLAIN_STATEMENTS = frozenset({'select', 'table', 'delete', 'insert', 'replace', 'update', 'with'})

# unless a specific table is configured, we try all of the events_statements tables in descending order of
# preference
Expand Down Expand Up @@ -203,6 +204,27 @@
)


class DBExplainError(Enum):
"""
Denotes the various reasons a query may not have an explain statement.
"""

# database error i.e connection error
database_error = 'database_error'

# this could be the result of a missing EXPLAIN function
invalid_schema = 'invalid_schema'

# some statements cannot be explained i.e AUTOVACUUM
no_plans_possible = 'no_plans_possible'

# unable to collect an exec plan with the strategy 'PROCEDURE' without a default schema
procedure_strategy_requires_default_schema = 'procedure_strategy_requires_default_schema'

# agent may not have access to the default schema
use_schema_error = 'use_schema_error'


class MySQLStatementSamples(object):
"""
Collects statement samples and execution plans.
Expand Down Expand Up @@ -483,16 +505,29 @@ def _collect_plan_for_statement(self, row):
if not self._explained_statements_ratelimiter.acquire(query_cache_key):
return None

plan = None
plan, explain_errors = None, None
with closing(self._get_db_connection().cursor()) as cursor:
try:
plan = self._explain_statement(cursor, row['sql_text'], row['current_schema'], obfuscated_statement)
plan, explain_errors = self._explain_statement(
cursor, row['sql_text'], row['current_schema'], obfuscated_statement
)
except Exception as e:
self._check.count(
"dd.mysql.statement_samples.error", 1, tags=self._tags + ["error:explain-{}".format(type(e))]
)
self._log.exception("Failed to explain statement: %s", obfuscated_statement)

collection_errors = []
if explain_errors:
for strategy, explain_err_code, explain_err in explain_errors:
collection_errors.append(
{
'strategy': strategy if strategy else None,
'code': explain_err_code.value if explain_err_code else None,
'message': '{}'.format(type(explain_err)) if explain_err else None,
}
)

normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None
if plan:
normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True) if plan else None
Expand All @@ -515,7 +550,12 @@ def _collect_plan_for_statement(self, row):
},
"db": {
"instance": row['current_schema'],
"plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature},
"plan": {
"definition": obfuscated_plan,
"cost": plan_cost,
"signature": plan_signature,
"collection_errors": collection_errors if collection_errors else None,
},
"query_signature": query_signature,
"resource_hash": apm_resource_hash,
"statement": obfuscated_statement,
Expand Down Expand Up @@ -660,21 +700,25 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement):
Tries the available methods used to explain a statement for the given schema. If a non-retryable
error occurs (such as a permissions error), then statements executed under the schema will be
disallowed in future attempts.
returns: An execution plan, otherwise it returns a list of tuples containing: (strategy, error code, error)
rtype: Optional[Dict], List[Tuple[Optional[str], Optional[DBExplainError], Optional[Exception]]]
"""
start_time = time.time()
strategy_cache_key = 'explain_strategy:%s' % schema
explain_strategy_error = 'ERROR'
tags = self._tags + ["schema:{}".format(schema)]

self._log.debug('explaining statement. schema=%s, statement="%s"', schema, statement)

if not self._can_explain(obfuscated_statement):
self._log.debug('Skipping statement which cannot be explained: %s', obfuscated_statement)
return None
return None, [(None, DBExplainError.no_plans_possible, None)]

if self._collection_strategy_cache.get(strategy_cache_key) == explain_strategy_error:
self._log.debug('Skipping statement due to cached collection failure: %s', obfuscated_statement)
return None
cached_strategy = self._collection_strategy_cache.get(strategy_cache_key)
if cached_strategy:
_, explain_err_code, _ = cached_strategy[0] # (strategy, explain_err_code, explain_err)
if explain_err_code:
self._log.debug('Skipping statement due to cached collection failure: %s', obfuscated_statement)
return None, None

try:
# If there was a default schema when this query was run, then switch to it before trying to collect
Expand All @@ -685,8 +729,9 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement):
except pymysql.err.DatabaseError as e:
if len(e.args) != 2:
raise
explain_errors = [(None, DBExplainError.use_schema_error, e)]
if e.args[0] in PYMYSQL_NON_RETRYABLE_ERRORS:
self._collection_strategy_cache[strategy_cache_key] = explain_strategy_error
self._collection_strategy_cache[strategy_cache_key] = explain_errors
self._check.count(
"dd.mysql.statement_samples.error", 1, tags=tags + ["error:explain-use-schema-{}".format(type(e))]
)
Expand All @@ -697,26 +742,29 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement):
schema,
obfuscated_statement,
)
return None
return None, explain_errors

# Use a cached strategy for the schema, if any, or try each strategy to collect plans
strategies = list(self._preferred_explain_strategies)
cached = self._collection_strategy_cache.get(strategy_cache_key)
if cached:
strategies.remove(cached)
strategies.insert(0, cached)
cached_strategy = self._collection_strategy_cache.get(strategy_cache_key)
if cached_strategy:
strategy, _, _ = cached_strategy[0] # (strategy, explain_err_code, explain_err)
strategies.remove(strategy)
strategies.insert(0, strategy)

explain_errors = []
for strategy in strategies:
try:
if not schema and strategy == "PROCEDURE":
explain_errors.append((strategy, DBExplainError.procedure_strategy_requires_default_schema, None))
self._log.debug(
'skipping PROCEDURE strategy as there is no default schema for this statement="%s"',
obfuscated_statement,
)
continue
plan = self._explain_strategies[strategy](cursor, statement, obfuscated_statement)
if plan:
self._collection_strategy_cache[strategy_cache_key] = strategy
self._collection_strategy_cache[strategy_cache_key] = [(strategy, None, None)]
self._log.debug(
'Successfully collected execution plan. strategy=%s, schema=%s, statement="%s"',
strategy,
Expand All @@ -728,13 +776,14 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement):
(time.time() - start_time) * 1000,
tags=self._tags + ["strategy:{}".format(strategy)],
)
return plan
return plan, None
except pymysql.err.DatabaseError as e:
if len(e.args) != 2:
raise
# we don't cache failed plan collection failures for specific queries because some queries in a schema
# can fail while others succeed. The failed collection will be cached for the specific query
# so we won't try to explain it again for the cache duration there.
explain_errors.append((strategy, DBExplainError.database_error, e))
self._check.count(
"dd.mysql.statement_samples.error",
1,
Expand All @@ -748,7 +797,7 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement):
obfuscated_statement,
)
continue
return None
return None, explain_errors

def _run_explain(self, cursor, statement, obfuscated_statement):
"""
Expand Down Expand Up @@ -776,7 +825,7 @@ def _run_fully_qualified_explain_procedure(self, cursor, statement, obfuscated_s

@staticmethod
def _can_explain(obfuscated_statement):
return obfuscated_statement.split(' ', 1)[0].lower() in VALID_EXPLAIN_STATEMENTS
return obfuscated_statement.split(' ', 1)[0].lower() in SUPPORTED_EXPLAIN_STATEMENTS

@staticmethod
def _parse_execution_plan_cost(execution_plan):
Expand Down
34 changes: 30 additions & 4 deletions mysql/tests/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,29 @@ def run_query(q):
)
@pytest.mark.parametrize("explain_strategy", ['PROCEDURE', 'FQ_PROCEDURE', 'STATEMENT', None])
@pytest.mark.parametrize(
"schema,statement",
"schema,statement,expected_collection_errors",
[
(None, 'select name as nam from testdb.users'),
('information_schema', 'select name as nam from testdb.users'),
('testdb', 'select name as nam from users'),
(
None,
'select name as nam from testdb.users',
[{'strategy': 'PROCEDURE', 'code': 'procedure_strategy_requires_default_schema', 'message': None}],
),
(
'information_schema',
'select * from testdb.users',
[{'strategy': 'PROCEDURE', 'code': 'database_error', 'message': "<class 'pymysql.err.OperationalError'>"}],
),
(
'testdb',
'select name as nam from users',
[
{
'strategy': 'FQ_PROCEDURE',
'code': 'database_error',
'message': "<class 'pymysql.err.ProgrammingError'>",
}
],
),
],
)
@pytest.mark.parametrize("aurora_replication_role", [None, "writer", "reader"])
Expand All @@ -422,6 +440,7 @@ def test_statement_samples_collect(
explain_strategy,
schema,
statement,
expected_collection_errors,
aurora_replication_role,
caplog,
):
Expand Down Expand Up @@ -486,6 +505,13 @@ def test_statement_samples_collect(
assert 'query_block' in json.loads(event['db']['plan']['definition']), "invalid json execution plan"
assert set(event['ddtags'].split(',')) == expected_tags

# Validate the events to ensure we've provided an explanation for not providing an exec plan
for event in matching:
if event['db']['plan']['definition'] is None:
assert event['db']['plan']['collection_errors'] == expected_collection_errors
else:
assert event['db']['plan']['collection_errors'] is None

# we avoid closing these in a try/finally block in order to maintain the connections in case we want to
# debug the test with --pdb
mysql_check._statement_samples._close_db_conn()
Expand Down

0 comments on commit 45fefb2

Please sign in to comment.