-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Track DB scheduling delay per-request #2775
Changes from all commits
4c40a62
411350f
ca8c4cd
39b2998
06caba6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -217,7 +217,6 @@ def loop(): | |
|
||
def _new_transaction(self, conn, desc, after_callbacks, final_callbacks, | ||
logging_context, func, *args, **kwargs): | ||
start = time.time() * 1000 | ||
txn_id = self._TXN_ID | ||
|
||
# We don't really need these to be unique, so lets stop it from | ||
|
@@ -277,21 +276,24 @@ def _new_transaction(self, conn, desc, after_callbacks, final_callbacks, | |
logger.debug("[TXN FAIL] {%s} %s", name, e) | ||
raise | ||
finally: | ||
end = time.time() * 1000 | ||
duration = end - start | ||
transaction_logger.debug("[TXN END] {%s}", name) | ||
|
||
if logging_context is not None: | ||
logging_context.add_database_transaction(duration) | ||
@defer.inlineCallbacks | ||
def runInteraction(self, desc, func, *args, **kwargs): | ||
"""Starts a transaction on the database and runs a given function | ||
|
||
transaction_logger.debug("[TXN END] {%s} %f", name, duration) | ||
Arguments: | ||
desc (str): description of the transaction, for logging and metrics | ||
func (func): callback function, which will be called with a | ||
database transaction (twisted.enterprise.adbapi.Transaction) as | ||
its first argument, followed by `args` and `kwargs`. | ||
|
||
self._current_txn_total_time += duration | ||
self._txn_perf_counters.update(desc, start, end) | ||
sql_txn_timer.inc_by(duration, desc) | ||
args (list): positional args to pass to `func` | ||
kwargs (dict): named args to pass to `func` | ||
|
||
@defer.inlineCallbacks | ||
def runInteraction(self, desc, func, *args, **kwargs): | ||
"""Wraps the .runInteraction() method on the underlying db_pool.""" | ||
Returns: | ||
Deferred: The result of func | ||
""" | ||
current_context = LoggingContext.current_context() | ||
|
||
start_time = time.time() * 1000 | ||
|
@@ -301,17 +303,32 @@ def runInteraction(self, desc, func, *args, **kwargs): | |
|
||
def inner_func(conn, *args, **kwargs): | ||
with LoggingContext("runInteraction") as context: | ||
sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) | ||
sched_delay_ms = time.time() * 1000 - start_time | ||
sql_scheduling_timer.inc_by(sched_delay_ms) | ||
|
||
if self.database_engine.is_connection_closed(conn): | ||
logger.debug("Reconnecting closed database connection") | ||
conn.reconnect() | ||
|
||
current_context.copy_to(context) | ||
return self._new_transaction( | ||
conn, desc, after_callbacks, final_callbacks, current_context, | ||
func, *args, **kwargs | ||
) | ||
txn_start_time_ms = time.time() * 1000 | ||
try: | ||
return self._new_transaction( | ||
conn, desc, after_callbacks, final_callbacks, current_context, | ||
func, *args, **kwargs | ||
) | ||
finally: | ||
txn_end_time_ms = time.time() * 1000 | ||
txn_duration = txn_end_time_ms - txn_start_time_ms | ||
|
||
current_context.add_database_transaction( | ||
txn_duration, sched_delay_ms, | ||
) | ||
self._current_txn_total_time += txn_duration | ||
self._txn_perf_counters.update( | ||
desc, txn_start_time_ms, txn_end_time_ms, | ||
) | ||
sql_txn_timer.inc_by(txn_duration, desc) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't this mean that any transactions created by calling runWithConnection directly won't be measured? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, yes, but they aren't currently measured as part of Looks like the only thing that uses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They may not be picked up by |
||
|
||
try: | ||
with PreserveLoggingContext(): | ||
|
@@ -329,7 +346,7 @@ def inner_func(conn, *args, **kwargs): | |
|
||
@defer.inlineCallbacks | ||
def runWithConnection(self, func, *args, **kwargs): | ||
"""Wraps the .runInteraction() method on the underlying db_pool.""" | ||
"""Wraps the .runWithConnection() method on the underlying db_pool.""" | ||
current_context = LoggingContext.current_context() | ||
|
||
start_time = time.time() * 1000 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is a distribution here.
Honestly I think it makes things clearer to always include the count, as otherwise it loos a bit odd to do:
rate(metric_one:total)/rate(another_metric:count)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
er, oops.
It seems really silly to me to maintain six identical copies of the same counter here. That's a lot of pointless objects, hash lookups, and integer increments. IMHO what we ought to be doing is
rate(synapse_http_server_response_db_sched_duration)/rate(synapse_http_server_response_count)
, which feels much more intuitive, but will take a bit of work to get there.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would be surprised if they're not completely dwarfed by transaction overhead.
Possibly, but having things consistent seems more intuitive than having a couple that don't fit.