diff --git a/mysql/datadog_checks/mysql/statement_samples.py b/mysql/datadog_checks/mysql/statement_samples.py index e3b371f9486f2..e79c08253397a 100644 --- a/mysql/datadog_checks/mysql/statement_samples.py +++ b/mysql/datadog_checks/mysql/statement_samples.py @@ -5,6 +5,7 @@ import time from concurrent.futures.thread import ThreadPoolExecutor from contextlib import closing +from enum import Enum import pymysql from cachetools import TTLCache @@ -25,7 +26,7 @@ ) from datadog_checks.base.utils.serialization import json -VALID_EXPLAIN_STATEMENTS = frozenset({'select', 'table', 'delete', 'insert', 'replace', 'update'}) +SUPPORTED_EXPLAIN_STATEMENTS = frozenset({'select', 'table', 'delete', 'insert', 'replace', 'update', 'with'}) # unless a specific table is configured, we try all of the events_statements tables in descending order of # preference @@ -203,6 +204,27 @@ ) +class DBExplainError(Enum): + """ + Denotes the various reasons a query may not have an explain statement. + """ + + # database error i.e connection error + database_error = 'database_error' + + # this could be the result of a missing EXPLAIN function + invalid_schema = 'invalid_schema' + + # some statements cannot be explained i.e AUTOVACUUM + no_plans_possible = 'no_plans_possible' + + # unable to collect an exec plan with the strategy 'PROCEDURE' without a default schema + procedure_strategy_requires_default_schema = 'procedure_strategy_requires_default_schema' + + # agent may not have access to the default schema + use_schema_error = 'use_schema_error' + + class MySQLStatementSamples(object): """ Collects statement samples and execution plans. @@ -483,16 +505,29 @@ def _collect_plan_for_statement(self, row): if not self._explained_statements_ratelimiter.acquire(query_cache_key): return None - plan = None + plan, explain_errors = None, None with closing(self._get_db_connection().cursor()) as cursor: try: - plan = self._explain_statement(cursor, row['sql_text'], row['current_schema'], obfuscated_statement) + plan, explain_errors = self._explain_statement( + cursor, row['sql_text'], row['current_schema'], obfuscated_statement + ) except Exception as e: self._check.count( "dd.mysql.statement_samples.error", 1, tags=self._tags + ["error:explain-{}".format(type(e))] ) self._log.exception("Failed to explain statement: %s", obfuscated_statement) + collection_errors = [] + if explain_errors: + for strategy, explain_err_code, explain_err in explain_errors: + collection_errors.append( + { + 'strategy': strategy if strategy else None, + 'code': explain_err_code.value if explain_err_code else None, + 'message': '{}'.format(type(explain_err)) if explain_err else None, + } + ) + normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None if plan: normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True) if plan else None @@ -515,7 +550,12 @@ def _collect_plan_for_statement(self, row): }, "db": { "instance": row['current_schema'], - "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature}, + "plan": { + "definition": obfuscated_plan, + "cost": plan_cost, + "signature": plan_signature, + "collection_errors": collection_errors if collection_errors else None, + }, "query_signature": query_signature, "resource_hash": apm_resource_hash, "statement": obfuscated_statement, @@ -660,21 +700,25 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement): Tries the available methods used to explain a statement for the given schema. If a non-retryable error occurs (such as a permissions error), then statements executed under the schema will be disallowed in future attempts. + returns: An execution plan, otherwise it returns a list of tuples containing: (strategy, error code, error) + rtype: Optional[Dict], List[Tuple[Optional[str], Optional[DBExplainError], Optional[Exception]]] """ start_time = time.time() strategy_cache_key = 'explain_strategy:%s' % schema - explain_strategy_error = 'ERROR' tags = self._tags + ["schema:{}".format(schema)] self._log.debug('explaining statement. schema=%s, statement="%s"', schema, statement) if not self._can_explain(obfuscated_statement): self._log.debug('Skipping statement which cannot be explained: %s', obfuscated_statement) - return None + return None, [(None, DBExplainError.no_plans_possible, None)] - if self._collection_strategy_cache.get(strategy_cache_key) == explain_strategy_error: - self._log.debug('Skipping statement due to cached collection failure: %s', obfuscated_statement) - return None + cached_strategy = self._collection_strategy_cache.get(strategy_cache_key) + if cached_strategy: + _, explain_err_code, _ = cached_strategy[0] # (strategy, explain_err_code, explain_err) + if explain_err_code: + self._log.debug('Skipping statement due to cached collection failure: %s', obfuscated_statement) + return None, None try: # If there was a default schema when this query was run, then switch to it before trying to collect @@ -685,8 +729,9 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement): except pymysql.err.DatabaseError as e: if len(e.args) != 2: raise + explain_errors = [(None, DBExplainError.use_schema_error, e)] if e.args[0] in PYMYSQL_NON_RETRYABLE_ERRORS: - self._collection_strategy_cache[strategy_cache_key] = explain_strategy_error + self._collection_strategy_cache[strategy_cache_key] = explain_errors self._check.count( "dd.mysql.statement_samples.error", 1, tags=tags + ["error:explain-use-schema-{}".format(type(e))] ) @@ -697,18 +742,21 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement): schema, obfuscated_statement, ) - return None + return None, explain_errors # Use a cached strategy for the schema, if any, or try each strategy to collect plans strategies = list(self._preferred_explain_strategies) - cached = self._collection_strategy_cache.get(strategy_cache_key) - if cached: - strategies.remove(cached) - strategies.insert(0, cached) + cached_strategy = self._collection_strategy_cache.get(strategy_cache_key) + if cached_strategy: + strategy, _, _ = cached_strategy[0] # (strategy, explain_err_code, explain_err) + strategies.remove(strategy) + strategies.insert(0, strategy) + explain_errors = [] for strategy in strategies: try: if not schema and strategy == "PROCEDURE": + explain_errors.append((strategy, DBExplainError.procedure_strategy_requires_default_schema, None)) self._log.debug( 'skipping PROCEDURE strategy as there is no default schema for this statement="%s"', obfuscated_statement, @@ -716,7 +764,7 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement): continue plan = self._explain_strategies[strategy](cursor, statement, obfuscated_statement) if plan: - self._collection_strategy_cache[strategy_cache_key] = strategy + self._collection_strategy_cache[strategy_cache_key] = [(strategy, None, None)] self._log.debug( 'Successfully collected execution plan. strategy=%s, schema=%s, statement="%s"', strategy, @@ -728,13 +776,14 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement): (time.time() - start_time) * 1000, tags=self._tags + ["strategy:{}".format(strategy)], ) - return plan + return plan, None except pymysql.err.DatabaseError as e: if len(e.args) != 2: raise # we don't cache failed plan collection failures for specific queries because some queries in a schema # can fail while others succeed. The failed collection will be cached for the specific query # so we won't try to explain it again for the cache duration there. + explain_errors.append((strategy, DBExplainError.database_error, e)) self._check.count( "dd.mysql.statement_samples.error", 1, @@ -748,7 +797,7 @@ def _explain_statement(self, cursor, statement, schema, obfuscated_statement): obfuscated_statement, ) continue - return None + return None, explain_errors def _run_explain(self, cursor, statement, obfuscated_statement): """ @@ -776,7 +825,7 @@ def _run_fully_qualified_explain_procedure(self, cursor, statement, obfuscated_s @staticmethod def _can_explain(obfuscated_statement): - return obfuscated_statement.split(' ', 1)[0].lower() in VALID_EXPLAIN_STATEMENTS + return obfuscated_statement.split(' ', 1)[0].lower() in SUPPORTED_EXPLAIN_STATEMENTS @staticmethod def _parse_execution_plan_cost(execution_plan): diff --git a/mysql/tests/test_mysql.py b/mysql/tests/test_mysql.py index 11aba42382dbf..71e62ecef75af 100644 --- a/mysql/tests/test_mysql.py +++ b/mysql/tests/test_mysql.py @@ -406,11 +406,29 @@ def run_query(q): ) @pytest.mark.parametrize("explain_strategy", ['PROCEDURE', 'FQ_PROCEDURE', 'STATEMENT', None]) @pytest.mark.parametrize( - "schema,statement", + "schema,statement,expected_collection_errors", [ - (None, 'select name as nam from testdb.users'), - ('information_schema', 'select name as nam from testdb.users'), - ('testdb', 'select name as nam from users'), + ( + None, + 'select name as nam from testdb.users', + [{'strategy': 'PROCEDURE', 'code': 'procedure_strategy_requires_default_schema', 'message': None}], + ), + ( + 'information_schema', + 'select * from testdb.users', + [{'strategy': 'PROCEDURE', 'code': 'database_error', 'message': ""}], + ), + ( + 'testdb', + 'select name as nam from users', + [ + { + 'strategy': 'FQ_PROCEDURE', + 'code': 'database_error', + 'message': "", + } + ], + ), ], ) @pytest.mark.parametrize("aurora_replication_role", [None, "writer", "reader"]) @@ -422,6 +440,7 @@ def test_statement_samples_collect( explain_strategy, schema, statement, + expected_collection_errors, aurora_replication_role, caplog, ): @@ -486,6 +505,13 @@ def test_statement_samples_collect( assert 'query_block' in json.loads(event['db']['plan']['definition']), "invalid json execution plan" assert set(event['ddtags'].split(',')) == expected_tags + # Validate the events to ensure we've provided an explanation for not providing an exec plan + for event in matching: + if event['db']['plan']['definition'] is None: + assert event['db']['plan']['collection_errors'] == expected_collection_errors + else: + assert event['db']['plan']['collection_errors'] is None + # we avoid closing these in a try/finally block in order to maintain the connections in case we want to # debug the test with --pdb mysql_check._statement_samples._close_db_conn()