From 644454a9723da359677052b7a4b9201e91e9a78a Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Tue, 26 Jul 2022 11:00:17 -0400 Subject: [PATCH] fix: fix race condition in BuiltinMetricsTracer (#1320) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/java-bigtable/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # ☕️ If you write sample code, please follow the [samples format]( https://github.com/GoogleCloudPlatform/java-docs-samples/blob/main/SAMPLE_FORMAT.md). --- .../v2/stub/metrics/BuiltinMetricsTracer.java | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java index 2148c674e3..6289951ded 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java @@ -58,7 +58,8 @@ class BuiltinMetricsTracer extends BigtableTracer { // Stopwatch is not thread safe so this is a workaround to check if the stopwatch changes is // flushed to memory. private final Stopwatch serverLatencyTimer = Stopwatch.createUnstarted(); - private final AtomicBoolean serverLatencyTimerIsRunning = new AtomicBoolean(); + private boolean serverLatencyTimerIsRunning = false; + private final Object timerLock = new Object(); private boolean flowControlIsDisabled = false; @@ -117,8 +118,11 @@ public void attemptStarted(Object request, int attemptNumber) { this.tableId = Util.extractTableId(request); } if (!flowControlIsDisabled) { - if (serverLatencyTimerIsRunning.compareAndSet(false, true)) { - serverLatencyTimer.start(); + synchronized (timerLock) { + if (!serverLatencyTimerIsRunning) { + serverLatencyTimer.start(); + serverLatencyTimerIsRunning = true; + } } } } @@ -144,8 +148,11 @@ public void onRequest(int requestCount) { if (flowControlIsDisabled) { // On request is only called when auto flow control is disabled. When auto flow control is // disabled, server latency is measured between onRequest and onResponse. - if (serverLatencyTimerIsRunning.compareAndSet(false, true)) { - serverLatencyTimer.start(); + synchronized (timerLock) { + if (!serverLatencyTimerIsRunning) { + serverLatencyTimer.start(); + serverLatencyTimerIsRunning = true; + } } } } @@ -159,9 +166,12 @@ public void responseReceived() { // When auto flow control is disabled and application requested multiple responses, server // latency is measured between afterResponse and responseReceived. // In all the cases, we want to stop the serverLatencyTimer here. - if (serverLatencyTimerIsRunning.compareAndSet(true, false)) { - totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); - serverLatencyTimer.reset(); + synchronized (timerLock) { + if (serverLatencyTimerIsRunning) { + totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); + serverLatencyTimer.reset(); + serverLatencyTimerIsRunning = false; + } } } @@ -172,8 +182,11 @@ public void afterResponse(long applicationLatency) { // measured between after the last response is processed and before the next response is // received. If flow control is disabled but requestLeft is greater than 0, // also start the timer to count the time between afterResponse and responseReceived. - if (serverLatencyTimerIsRunning.compareAndSet(false, true)) { - serverLatencyTimer.start(); + synchronized (timerLock) { + if (!serverLatencyTimerIsRunning) { + serverLatencyTimer.start(); + serverLatencyTimerIsRunning = true; + } } } } @@ -235,10 +248,13 @@ private void recordOperationCompletion(@Nullable Throwable status) { private void recordAttemptCompletion(@Nullable Throwable status) { // If the attempt failed, the time spent in retry should be counted in application latency. // Stop the stopwatch and decrement requestLeft. - if (serverLatencyTimerIsRunning.compareAndSet(true, false)) { - requestLeft.decrementAndGet(); - totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); - serverLatencyTimer.reset(); + synchronized (timerLock) { + if (serverLatencyTimerIsRunning) { + requestLeft.decrementAndGet(); + totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); + serverLatencyTimer.reset(); + serverLatencyTimerIsRunning = false; + } } recorder.putAttemptLatencies(attemptTimer.elapsed(TimeUnit.MILLISECONDS)); recorder.record(Util.extractStatus(status), tableId, zone, cluster);