Skip to content

Commit

Permalink
Use milliseconds for metrics, support prefix for metric name, rename …
Browse files Browse the repository at this point in the history
…stage to operation (#62)

* method to prefix metric name

* latency metrics to milliseconds for standardization

* rename Connector stage to operation

* unhandled exception should have one name instead of potentially many

* create separate milliseconds metrics

* update test to reflect new metrics calls
  • Loading branch information
wi-y authored Sep 2, 2020
1 parent 13f9052 commit 9b42bef
Show file tree
Hide file tree
Showing 16 changed files with 86 additions and 37 deletions.
4 changes: 2 additions & 2 deletions docs/Debugging.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ An instance of [Application Insights](https://docs.microsoft.com/en-us/azure/azu

### Telemetry Collected
All telemetry below can be viewed in [Application Insights](https://docs.microsoft.com/en-us/azure/azure-monitor/app/app-insights-overview). Select the [metrics](https://docs.microsoft.com/en-us/azure/azure-monitor/platform/metrics-charts) section. The telemetry documented will fall under the **custom** category.
* **MeasurementIngestionLatency**: The difference between the ingestion time on the event hub end point and received by the FHIR conversion function. Measured in seconds.
* **MeasurementIngestionLatency**: The difference between the ingestion time on the event hub end point and received by the FHIR conversion function. Measured in milliseconds.
* **MeasurementGroup**: The number of unique groupings of measurements across type, device, patient, and configured time period generated by the FHIR conversion function. Should directly correlate to the number of observations being created or updated.
* **Measurement**: The number of raw value readings received by the FHIR conversion function.
* **DeviceEvent**: The number of raw events received by the connector prior to any normalization.
* **NormalizedEvent**: The number of mapped normalized values outputted from the normalization step.
* **DeviceEventProcessingLatency**: The time between an event's ingestion and being processed for normalization. A large value here indicates a backlog of events being processed for normalization. Measured in seconds.
* **DeviceEventProcessingLatency**: The time between an event's ingestion and being processed for normalization. A large value here indicates a backlog of events being processed for normalization. Measured in milliseconds.
* **Handled Exceptions**: The number of handled exceptions for a given category. In order to not stall the pipeline certain non recoverable errors are categorized and then recorded as telemetry. The current exceptions handled are:

|Exception|Description
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public async Task<IActionResult> MeasurementCollectionToFhir(
catch (Exception ex)
{
_logger.LogMetric(
IomtMetrics.UnhandledException(ex.GetType().Name, ConnectorStage.FHIRConversion),
IomtMetrics.UnhandledException(ex.GetType().Name, ConnectorOperation.FHIRConversion),
1);
throw;
}
Expand Down Expand Up @@ -77,7 +77,7 @@ public async Task NormalizeDeviceData(
catch (Exception ex)
{
_logger.LogMetric(
IomtMetrics.UnhandledException(ex.GetType().Name, ConnectorStage.Normalization),
IomtMetrics.UnhandledException(ex.GetType().Name, ConnectorOperation.Normalization),
1);
throw;
}
Expand Down
10 changes: 9 additions & 1 deletion src/lib/Microsoft.Health.Common/Telemetry/Metric.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@ public Metric(string name, IDictionary<string, object> dimensions)
Dimensions = dimensions;
}

public string Name { get; }
public string Name { get; set; }

public IDictionary<string, object> Dimensions { get; }

public void AddPrefixToName(string prefix)
{
if (!Name.StartsWith(prefix, System.StringComparison.CurrentCulture))
{
Name = $"{prefix}{Name}";
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,31 @@

namespace Microsoft.Health.Common.Telemetry
{
public static class ConnectorStage
public static class ConnectorOperation
{
/// <summary>
/// The setup stage of the IoMT Connector
/// </summary>
public static string Setup => nameof(ConnectorStage.Setup);
public static string Setup => nameof(ConnectorOperation.Setup);

/// <summary>
/// The normalization stage of the IoMT Connector
/// </summary>
public static string Normalization => nameof(ConnectorStage.Normalization);
public static string Normalization => nameof(ConnectorOperation.Normalization);

/// <summary>
/// The grouping stage of the IoMT Connector
/// </summary>
public static string Grouping => nameof(ConnectorStage.Grouping);
public static string Grouping => nameof(ConnectorOperation.Grouping);

/// <summary>
/// The FHIR conversion stage of the IoMT Connector
/// </summary>
public static string FHIRConversion => nameof(ConnectorStage.FHIRConversion);
public static string FHIRConversion => nameof(ConnectorOperation.FHIRConversion);

/// <summary>
/// If a stage is not determined.
/// </summary>
public static string Unknown => nameof(ConnectorStage.Unknown);
public static string Unknown => nameof(ConnectorOperation.Unknown);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static class DimensionNames
/// <summary>
/// A metric dimension that represents each ingestion stage of the IoMT Connector.
/// </summary>
public static string Stage => nameof(DimensionNames.Stage);
public static string Operation => nameof(DimensionNames.Operation);

/// <summary>
/// A metric dimension for a error type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public MultipleResourceFoundException(string message, Exception innerException)
{ DimensionNames.Category, Category.Errors },
{ DimensionNames.ErrorType, ErrorType.FHIRResourceError },
{ DimensionNames.ErrorSeverity, ErrorSeverity.Warning },
{ DimensionNames.Stage, ConnectorStage.FHIRConversion },
{ DimensionNames.Operation, ConnectorOperation.FHIRConversion },
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class TemplateNotFoundException :
{ DimensionNames.Category, Category.Errors },
{ DimensionNames.ErrorType, ErrorType.GeneralError },
{ DimensionNames.ErrorSeverity, ErrorSeverity.Critical },
{ DimensionNames.Stage, ConnectorStage.Unknown },
{ DimensionNames.Operation, ConnectorOperation.Unknown },
});

public TemplateNotFoundException(string message)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public CorrelationIdNotDefinedException()
{ DimensionNames.Category, Category.Errors },
{ DimensionNames.ErrorType, ErrorType.DeviceMessageError },
{ DimensionNames.ErrorSeverity, ErrorSeverity.Critical },
{ DimensionNames.Stage, ConnectorStage.Grouping },
{ DimensionNames.Operation, ConnectorOperation.Grouping },
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public FhirResourceNotFoundException(string message, Exception innerException)
{ DimensionNames.Category, Category.Errors },
{ DimensionNames.ErrorType, ErrorType.FHIRResourceError },
{ DimensionNames.ErrorSeverity, ErrorSeverity.Warning },
{ DimensionNames.Stage, ConnectorStage.FHIRConversion },
{ DimensionNames.Operation, ConnectorOperation.FHIRConversion },
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public ResourceIdentityNotDefinedException(string message, Exception innerExcept
{ DimensionNames.Category, Category.Errors },
{ DimensionNames.ErrorType, ErrorType.FHIRResourceError },
{ DimensionNames.ErrorSeverity, ErrorSeverity.Warning },
{ DimensionNames.Stage, ConnectorStage.FHIRConversion },
{ DimensionNames.Operation, ConnectorOperation.FHIRConversion },
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ private async Task StartConsumer(ISourceBlock<EventData> producer, IAsyncCollect
IomtMetrics.DeviceEventProcessingLatency(),
(DateTime.UtcNow - evt.SystemProperties.EnqueuedTimeUtc).TotalSeconds);

_log.LogMetric(
IomtMetrics.DeviceEventProcessingLatencyMs(),
(DateTime.UtcNow - evt.SystemProperties.EnqueuedTimeUtc).TotalMilliseconds);

var token = _converter.Convert(evt);
foreach (var measurement in _contentTemplate.GetMeasurements(token))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public async Task ProcessStreamAsync(Stream data, string templateDefinition, ITe
}
catch (Exception ex)
{
if (!Options.ExceptionService.HandleException(ex, log, ConnectorStage.FHIRConversion))
if (!Options.ExceptionService.HandleException(ex, log, ConnectorOperation.FHIRConversion))
{
log.LogError(ex);
throw;
Expand Down Expand Up @@ -109,11 +109,13 @@ await Task.Run(() =>
continue;
}

var latency = (nowRef - m.IngestionTimeUtc.Value).TotalSeconds;

log.LogMetric(
IomtMetrics.MeasurementIngestionLatency(),
latency);
(nowRef - m.IngestionTimeUtc.Value).TotalSeconds);

log.LogMetric(
IomtMetrics.MeasurementIngestionLatencyMs(),
(nowRef - m.IngestionTimeUtc.Value).TotalMilliseconds);
}
}).ConfigureAwait(false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public PatientDeviceMismatchException()
{ DimensionNames.Category, Category.Errors },
{ DimensionNames.ErrorType, ErrorType.FHIRResourceError },
{ DimensionNames.ErrorSeverity, ErrorSeverity.Warning },
{ DimensionNames.Stage, ConnectorStage.FHIRConversion },
{ DimensionNames.Operation, ConnectorOperation.FHIRConversion },
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@ public static class IomtMetrics
private static string _categoryDimension = DimensionNames.Category;
private static string _errorTypeDimension = DimensionNames.ErrorType;
private static string _errorSeverityDimension = DimensionNames.ErrorSeverity;
private static string _stageDimension = DimensionNames.Stage;
private static string _operationDimension = DimensionNames.Operation;

private static Metric _measurementIngestionLatency = new Metric(
"MeasurementIngestionLatency",
new Dictionary<string, object>
{
{ _nameDimension, "MeasurementIngestionLatency" },
{ _categoryDimension, Category.Latency },
{ _stageDimension, ConnectorStage.FHIRConversion },
{ _operationDimension, ConnectorOperation.FHIRConversion },
});

private static Metric _measurementIngestionLatencyMs = new Metric(
"MeasurementIngestionLatencyMs",
new Dictionary<string, object>
{
{ _nameDimension, "MeasurementIngestionLatencyMs" },
{ _categoryDimension, Category.Latency },
{ _operationDimension, ConnectorOperation.FHIRConversion },
});

private static Metric _measurementGroup = new Metric(
Expand All @@ -35,7 +44,7 @@ public static class IomtMetrics
{
{ _nameDimension, "MeasurementGroup" },
{ _categoryDimension, Category.Traffic },
{ _stageDimension, ConnectorStage.FHIRConversion },
{ _operationDimension, ConnectorOperation.FHIRConversion },
});

private static Metric _measurement = new Metric(
Expand All @@ -44,7 +53,7 @@ public static class IomtMetrics
{
{ _nameDimension, "Measurement" },
{ _categoryDimension, Category.Traffic },
{ _stageDimension, ConnectorStage.FHIRConversion },
{ _operationDimension, ConnectorOperation.FHIRConversion },
});

private static Metric _deviceEvent = new Metric(
Expand All @@ -53,7 +62,7 @@ public static class IomtMetrics
{
{ _nameDimension, "DeviceEvent" },
{ _categoryDimension, Category.Traffic },
{ _stageDimension, ConnectorStage.Normalization },
{ _operationDimension, ConnectorOperation.Normalization },
});

private static Metric _normalizedEvent = new Metric(
Expand All @@ -62,7 +71,7 @@ public static class IomtMetrics
{
{ _nameDimension, "NormalizedEvent" },
{ _categoryDimension, Category.Traffic },
{ _stageDimension, ConnectorStage.Normalization },
{ _operationDimension, ConnectorOperation.Normalization },
});

private static Metric _deviceEventProcessingLatency = new Metric(
Expand All @@ -71,7 +80,16 @@ public static class IomtMetrics
{
{ _nameDimension, "DeviceEventProcessingLatency" },
{ _categoryDimension, Category.Latency },
{ _stageDimension, ConnectorStage.Normalization },
{ _operationDimension, ConnectorOperation.Normalization },
});

private static Metric _deviceEventProcessingLatencyMs = new Metric(
"DeviceEventProcessingLatencyMs",
new Dictionary<string, object>
{
{ _nameDimension, "DeviceEventProcessingLatencyMs" },
{ _categoryDimension, Category.Latency },
{ _operationDimension, ConnectorOperation.Normalization },
});

private static Metric _notSupported = new Metric(
Expand All @@ -82,7 +100,7 @@ public static class IomtMetrics
{ _categoryDimension, Category.Errors },
{ _errorTypeDimension, ErrorType.FHIRResourceError },
{ _errorSeverityDimension, ErrorSeverity.Warning },
{ _stageDimension, ConnectorStage.FHIRConversion },
{ _operationDimension, ConnectorOperation.FHIRConversion },
});

/// <summary>
Expand All @@ -93,6 +111,14 @@ public static Metric MeasurementIngestionLatency()
return _measurementIngestionLatency;
}

/// <summary>
/// The latency between event ingestion and output to FHIR processor, in milliseconds.
/// </summary>
public static Metric MeasurementIngestionLatencyMs()
{
return _measurementIngestionLatencyMs;
}

/// <summary>
/// The number of measurement groups generated by the FHIR processor based on provided input.
/// </summary>
Expand Down Expand Up @@ -133,6 +159,14 @@ public static Metric DeviceEventProcessingLatency()
return _deviceEventProcessingLatency;
}

/// <summary>
/// The latency between the event ingestion time and normalization processing, in milliseconds. An increase here indicates a backlog of messages to process.
/// </summary>
public static Metric DeviceEventProcessingLatencyMs()
{
return _deviceEventProcessingLatencyMs;
}

/// <summary>
/// A metric for when FHIR resource does not support the provided type as a value.
/// </summary>
Expand All @@ -145,14 +179,14 @@ public static Metric UnhandledException(string exceptionName, string connectorSt
{
EnsureArg.IsNotNull(exceptionName);
return new Metric(
exceptionName,
"UnhandledException",
new Dictionary<string, object>
{
{ _nameDimension, exceptionName },
{ _categoryDimension, Category.Errors },
{ _errorTypeDimension, ErrorType.GeneralError },
{ _errorSeverityDimension, ErrorSeverity.Critical },
{ _stageDimension, connectorStage },
{ _operationDimension, connectorStage },
});
}

Expand All @@ -165,7 +199,8 @@ public static Metric HandledException(string exceptionName, string connectorStag
{ _nameDimension, exceptionName },
{ _categoryDimension, Category.Errors },
{ _errorTypeDimension, ErrorType.GeneralError },
{ _stageDimension, connectorStage },
{ _errorSeverityDimension, ErrorSeverity.Critical },
{ _operationDimension, connectorStage },
});
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public async void GivenExceptionDuringParseStreamAsync_WhenParseStreamAsyncAndEx

options.TemplateFactory.Received(1).Create(string.Empty);
await fhirService.ReceivedWithAnyArgs(2).ProcessAsync(default, default);
options.ExceptionService.Received(2).HandleException(exception, log, ConnectorStage.FHIRConversion);
options.ExceptionService.Received(2).HandleException(exception, log, ConnectorOperation.FHIRConversion);
}

[Fact]
Expand Down Expand Up @@ -182,7 +182,7 @@ public async void GivenMultipleMeasurementsInMeasurementGroup_WhenParseStreamAsy

// Telemetry logging is async/non-blocking. Ensure enough time has pass so section is hit.
await Task.Delay(1000);
log.ReceivedWithAnyArgs(4).LogMetric(null, 0d);
log.ReceivedWithAnyArgs(6).LogMetric(null, 0d);
}

private static Stream ToStream(object obj)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void GivenHandledExceptionTypes_WhenHandleExpection_ThenMetricLoggedAndTr
var ex = Activator.CreateInstance(exType) as Exception;

var exProcessor = new ExceptionTelemetryProcessor();
var handled = exProcessor.HandleException(ex, log, ConnectorStage.FHIRConversion);
var handled = exProcessor.HandleException(ex, log, ConnectorOperation.FHIRConversion);
Assert.True(handled);

log.ReceivedWithAnyArgs(1).LogMetric(null, default(double));
Expand All @@ -42,7 +42,7 @@ public void GivenUnhandledExceptionTypes_WhenHandleExpection_ThenNoMetricLoggedA
var ex = Activator.CreateInstance(exType) as Exception;

var exProcessor = new ExceptionTelemetryProcessor();
var handled = exProcessor.HandleException(ex, log, ConnectorStage.FHIRConversion);
var handled = exProcessor.HandleException(ex, log, ConnectorOperation.FHIRConversion);
Assert.False(handled);

log.DidNotReceiveWithAnyArgs().LogMetric(null, default(double));
Expand Down

0 comments on commit 9b42bef

Please sign in to comment.