-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add persistent query saturation metric #7955
Changes from all commits
61240b9
3729161
2530c3b
eb96a6f
c23f6ff
634ce8e
46408e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Copyright 2021 Confluent Inc. | ||
* | ||
* Licensed under the Confluent Community License (the "License"); you may not use | ||
* this file except in compliance with the License. You may obtain a copy of the | ||
* License at | ||
* | ||
* http://www.confluent.io/confluent-community-license | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OF ANY KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations under the License. | ||
*/ | ||
|
||
package io.confluent.ksql.internal; | ||
|
||
import java.time.Duration; | ||
import java.time.Instant; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.atomic.AtomicReference; | ||
import org.apache.kafka.common.MetricName; | ||
import org.apache.kafka.common.metrics.Gauge; | ||
import org.apache.kafka.common.metrics.MetricConfig; | ||
import org.apache.kafka.common.metrics.Metrics; | ||
|
||
public class JmxDataPointsReporter implements MetricsReporter { | ||
private final Metrics metrics; | ||
private final String group; | ||
private final Map<MetricName, DataPointBasedGauge> gauges = new ConcurrentHashMap<>(); | ||
private final Duration staleThreshold; | ||
|
||
public JmxDataPointsReporter( | ||
final Metrics metrics, | ||
final String group, | ||
final Duration staleThreshold | ||
) { | ||
this.metrics = Objects.requireNonNull(metrics, "metrics"); | ||
this.group = Objects.requireNonNull(group, "group"); | ||
this.staleThreshold = Objects.requireNonNull(staleThreshold, "staleThreshold"); | ||
} | ||
|
||
@Override | ||
public void report(final List<DataPoint> dataPoints) { | ||
dataPoints.forEach(this::report); | ||
} | ||
|
||
private void report(final DataPoint dataPoint) { | ||
final MetricName metricName | ||
= metrics.metricName(dataPoint.getName(), group, dataPoint.getTags()); | ||
if (gauges.containsKey(metricName)) { | ||
gauges.get(metricName).dataPointRef.set(dataPoint); | ||
} else { | ||
gauges.put(metricName, new DataPointBasedGauge(dataPoint, staleThreshold)); | ||
metrics.addMetric(metricName, gauges.get(metricName)); | ||
} | ||
} | ||
|
||
@Override | ||
public void cleanup(final String name, final Map<String, String> tags) { | ||
final MetricName metricName = metrics.metricName(name, group, tags); | ||
metrics.removeMetric(metricName); | ||
gauges.remove(metricName); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
} | ||
|
||
@Override | ||
public void configure(final Map<String, ?> map) { | ||
} | ||
|
||
private static final class DataPointBasedGauge implements Gauge<Object> { | ||
private final AtomicReference<DataPoint> dataPointRef; | ||
private final Duration staleThreshold; | ||
|
||
private DataPointBasedGauge( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that when the underlying stream thread within the KS runtime comes and goes as we replace them, we would keep each as a separate data point and automatically "mute" those data points that are too stale, meaning the thread may be already gone. So when we apply the saturation equation we may temporarily be counting on some threads that are already gone but that would resume back to normal. Is that right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep - once the saturation metric agent sees that a thread is missing, it will clean up the underlying |
||
final DataPoint initial, | ||
final Duration staleThreshold | ||
) { | ||
this.dataPointRef = new AtomicReference<>(initial); | ||
this.staleThreshold = staleThreshold; | ||
} | ||
|
||
@Override | ||
public Object value(final MetricConfig metricConfig, final long now) { | ||
final DataPoint dataPoint = dataPointRef.get(); | ||
if (dataPoint.getTime().isAfter(Instant.ofEpochMilli(now).minus(staleThreshold))) { | ||
return dataPoint.getValue(); | ||
} | ||
return null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How would we interpret There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reporter should just skip reporting the value, so we'd see missing data in the metrics store (which is what we want) |
||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would either the group of the datapoint contain the QueryID?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may be wrong, but I think the queryId would be in the tags?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep - that's correct for metrics that are specific to a given query