Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DBMON-3642] Proper handle events_statements_* timer_end overflow #16936

Merged
merged 3 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mysql/changelog.d/16936.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[DBMON-3642] Proper handle events_statements_* timer_end overflow
44 changes: 36 additions & 8 deletions mysql/datadog_checks/mysql/statement_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@
'current_schema',
# used for signature
'digest_text',
'timer_end_time_s',
'uptime',
'now',
'timer_end',
'max_timer_wait_ns',
'timer_start',
# included as network.client.ip
Expand All @@ -70,7 +72,9 @@
digest,
digest_text,
timer_start,
@startup_time_s+timer_end*1e-12 as timer_end_time_s,
@uptime as uptime,
unix_timestamp() as now,
timer_end,
timer_wait / 1000 AS timer_wait_ns,
lock_time / 1000 AS lock_time_ns,
rows_affected,
Expand Down Expand Up @@ -101,11 +105,11 @@
""",
).strip()

STARTUP_TIME_SUBQUERY = re.sub(
UPTIME_SUBQUERY = re.sub(
r'\s+',
' ',
"""
(SELECT UNIX_TIMESTAMP()-VARIABLE_VALUE
(SELECT VARIABLE_VALUE
FROM {global_status_table}
WHERE VARIABLE_NAME='UPTIME')
""",
Expand Down Expand Up @@ -142,6 +146,9 @@
}
)

# the max value of signed BIGINT type column
BIGINT_MAX = 2**63 - 1


class DBExplainErrorCode(Enum):
"""
Expand Down Expand Up @@ -324,9 +331,7 @@ def _get_new_events_statements_current(self):
with closing(self._get_db_connection().cursor(pymysql.cursors.DictCursor)) as cursor:
self._cursor_run(
cursor,
"set @startup_time_s = {}".format(
STARTUP_TIME_SUBQUERY.format(global_status_table=self._global_status_table)
),
"set @uptime = {}".format(UPTIME_SUBQUERY.format(global_status_table=self._global_status_table)),
)
self._cursor_run(cursor, EVENTS_STATEMENTS_CURRENT_QUERY)
rows = cursor.fetchall()
Expand Down Expand Up @@ -428,7 +433,7 @@ def _collect_plan_for_statement(self, row):
query_plan_cache_key = (query_cache_key, plan_signature)
if self._seen_samples_ratelimiter.acquire(query_plan_cache_key):
return {
"timestamp": row["timer_end_time_s"] * 1000,
"timestamp": self._calculate_timer_end(row),
"dbm_type": "plan",
"host": self._check.resolved_hostname,
"ddagentversion": datadog_agent.get_version(),
Expand Down Expand Up @@ -465,6 +470,12 @@ def _collect_plan_for_statement(self, row):
def _collect_plans_for_statements(self, rows):
for row in rows:
try:
if not row['timer_end']:
# If an event is produced from an instrument that has TIMED = NO,
# timing information is not collected,
# and TIMER_START, TIMER_END, and TIMER_WAIT are all NULL.
self._log.debug("Skipping statement with missing timer_end: %s", row)
continue
event = self._collect_plan_for_statement(row)
if event:
yield event
Expand Down Expand Up @@ -799,3 +810,20 @@ def _run_fully_qualified_explain_procedure(self, schema, cursor, statement, obfu
@staticmethod
def _can_explain(obfuscated_statement):
return obfuscated_statement.split(' ', 1)[0].lower() in SUPPORTED_EXPLAIN_STATEMENTS

@staticmethod
def _calculate_timer_end(row):
"""
Calculate the timer_end_time_s from the timer_end, now and uptime fields
"""
# timer_end is in picoseconds and uptime is in seconds
# timer_end can overflow, so we need to calcuate how many times it overflowed
timer_end = row['timer_end']
now = row['now']
uptime = int(row['uptime'])

bigint_max_in_seconds = BIGINT_MAX * 1e-12
# when timer_end is greater than bigint_max_in_seconds, we need to add the difference to the uptime
seconds_to_add = uptime // bigint_max_in_seconds * bigint_max_in_seconds
timer_end_time_s = now - uptime + seconds_to_add + timer_end * 1e-12
return int(timer_end_time_s * 1000)
24 changes: 23 additions & 1 deletion mysql/tests/test_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ def test_statement_samples_collect(
assert event['db']['plan']['collection_errors'] == expected_collection_errors
else:
assert event['db']['plan']['collection_errors'] is None
assert event['timestamp'] is not None
assert time.time() - event['timestamp'] < 60 # ensure the timestamp is recent

# we avoid closing these in a try/finally block in order to maintain the connections in case we want to
# debug the test with --pdb
Expand Down Expand Up @@ -528,7 +530,9 @@ def test_missing_explain_procedure(dbm_instance, dd_run_check, aggregator, state
'sql_text': statement,
'query': statement,
'digest_text': statement,
'timer_end_time_s': 10003.1,
'now': time.time(),
'uptime': '21466230',
'timer_end': 3019558487284095384,
'timer_wait_ns': 12.9,
}

Expand Down Expand Up @@ -969,3 +973,21 @@ def test_normalize_queries(dbm_instance):
'lock_time': 18298000,
}
]


@pytest.mark.unit
@pytest.mark.parametrize(
"timer_end,now,uptime,expected_timestamp",
[
pytest.param(3019558487284095384, 1708025457, 100, 1711044915487, id="picoseconds not overflow"),
pytest.param(3019558487284095384, 1708025457, 21466230, 1708025529560, id="picoseconds overflow"),
],
)
def test_statement_samples_calculate_timer_end(dbm_instance, timer_end, now, uptime, expected_timestamp):
check = MySql(common.CHECK_NAME, {}, [dbm_instance])
row = {
'timer_end': timer_end,
'now': now,
'uptime': uptime,
}
assert check._statement_samples._calculate_timer_end(row) == expected_timestamp
Loading