Skip to content

Commit

Permalink
fix: higher application blocking latency precision (#1676)
Browse files Browse the repository at this point in the history
Use nano seconds for application blocking latency to reduce rounding errors from starting and stopping the timer within a millisecond.

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/java-bigtable/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> ☕️

If you write sample code, please follow the [samples format](
https://togithub.com/GoogleCloudPlatform/java-docs-samples/blob/main/SAMPLE_FORMAT.md).
  • Loading branch information
mutianf authored Mar 23, 2023
1 parent fc058e9 commit 45ce93b
Showing 1 changed file with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class BuiltinMetricsTracer extends BigtableTracer {
// Total server latency needs to be atomic because it's accessed from different threads. E.g.
// request() from user thread and attempt failed from grpc thread. We're only measuring the extra
// time application spent blocking grpc buffer, which will be operationLatency - serverLatency.
private final AtomicLong totalServerLatency = new AtomicLong(0);
private final AtomicLong totalServerLatencyNano = new AtomicLong(0);
// Stopwatch is not thread safe so this is a workaround to check if the stopwatch changes is
// flushed to memory.
private final Stopwatch serverLatencyTimer = Stopwatch.createUnstarted();
Expand Down Expand Up @@ -171,7 +171,7 @@ public void responseReceived() {
// In all the cases, we want to stop the serverLatencyTimer here.
synchronized (timerLock) {
if (serverLatencyTimerIsRunning) {
totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS));
totalServerLatencyNano.addAndGet(serverLatencyTimer.elapsed(TimeUnit.NANOSECONDS));
serverLatencyTimer.reset();
serverLatencyTimerIsRunning = false;
}
Expand Down Expand Up @@ -233,6 +233,7 @@ private void recordOperationCompletion(@Nullable Throwable status) {
}
operationTimer.stop();
long operationLatency = operationTimer.elapsed(TimeUnit.MILLISECONDS);
long operationLatencyNano = operationTimer.elapsed(TimeUnit.NANOSECONDS);

// Only record when retry count is greater than 0 so the retry
// graph will be less confusing
Expand All @@ -242,7 +243,8 @@ private void recordOperationCompletion(@Nullable Throwable status) {

// serverLatencyTimer should already be stopped in recordAttemptCompletion
recorder.putOperationLatencies(operationLatency);
recorder.putApplicationLatencies(operationLatency - totalServerLatency.get());
recorder.putApplicationLatencies(
Duration.ofNanos(operationLatencyNano - totalServerLatencyNano.get()).toMillis());

if (operationType == OperationType.ServerStreaming
&& spanName.getMethodName().equals("ReadRows")) {
Expand All @@ -258,7 +260,7 @@ private void recordAttemptCompletion(@Nullable Throwable status) {
synchronized (timerLock) {
if (serverLatencyTimerIsRunning) {
requestLeft.decrementAndGet();
totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS));
totalServerLatencyNano.addAndGet(serverLatencyTimer.elapsed(TimeUnit.NANOSECONDS));
serverLatencyTimer.reset();
serverLatencyTimerIsRunning = false;
}
Expand Down

0 comments on commit 45ce93b

Please sign in to comment.