Skip to content

Commit

Permalink
Add Support for Feature & Attach Statsbeats (Azure#24470)
Browse files Browse the repository at this point in the history
* Begin adding the long interval statsbeat metrics.

* Modify shortHost to comply with spec.

* Add callbacks for long interval statsbeats.

* Update VM function to get resourceProviderId and set attach statsbeat accordingly.

* Add EU endpoints to be in line with the spec.

* Make the StatsbeatMonitorStatsbeatExporter public such that the AI distro can access it.

* Add separate collection interval options for long and short statsbeats.

* Fix test to set the correct value in the statsbeat config.

* Correct statsbeat tests.

* Ingest features and instrumentations from the distro and output from the appropriate exporters.

* Fix feature statsbeat as a batched observable.

* Add long interval immediate export.

* Add to changelog.

* Add long interval statsbet tests.

* Document instrumentations.

* Fix documentation.

* Add final documentation.

* Refactor long interval statsbeat.

* Fix long interval tests and get rid of testing code.

* Fix formatting.

* Document singleton.

* Clean up env variables.

* Fix build issues.

* Address CR comments.

* Clean up comment.

* Fix build process errors.

* Modify project to make statsbeat details clearer and inherit shared methods.
  • Loading branch information
JacksonWeber authored Jan 25, 2023
1 parent 1706984 commit 1584998
Show file tree
Hide file tree
Showing 11 changed files with 716 additions and 401 deletions.
2 changes: 2 additions & 0 deletions sdk/monitor/monitor-opentelemetry-exporter/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## 1.0.0-beta.11 (Unreleased)

- Add attach and feature Statsbeat Metrics.

### Features Added

### Breaking Changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ export class AzureMonitorMetricExporter extends AzureMonitorBaseExporter impleme
shutdown(): Promise<void>;
}

// @internal
export class _AzureMonitorStatsbeatExporter extends AzureMonitorBaseExporter implements PushMetricExporter {
// @public
export class AzureMonitorStatsbeatExporter extends AzureMonitorBaseExporter implements PushMetricExporter {
constructor(options: AzureMonitorExporterOptions);
export(metrics: ResourceMetrics, resultCallback: (result: ExportResult) => void): Promise<void>;
forceFlush(): Promise<void>;
Expand Down
39 changes: 23 additions & 16 deletions sdk/monitor/monitor-opentelemetry-exporter/src/export/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ import { PersistentStorage, Sender } from "../types";
import { isRetriable, BreezeResponse } from "../utils/breezeUtils";
import { DEFAULT_BREEZE_ENDPOINT, ENV_CONNECTION_STRING } from "../Declarations/Constants";
import { TelemetryItem as Envelope } from "../generated";
import { StatsbeatMetrics } from "./statsbeat/statsbeatMetrics";
import { NetworkStatsbeatMetrics } from "./statsbeat/networkStatsbeatMetrics";
import { MAX_STATSBEAT_FAILURES } from "./statsbeat/types";
import { getInstance } from "./statsbeat/longIntervalStatsbeatMetrics";

const DEFAULT_BATCH_SEND_RETRY_INTERVAL_MS = 60_000;
/**
Expand All @@ -28,7 +29,8 @@ export abstract class AzureMonitorBaseExporter {
private readonly _sender: Sender;
private _numConsecutiveRedirects: number;
private _retryTimer: NodeJS.Timer | null;
private _statsbeatMetrics: StatsbeatMetrics | undefined;
private _networkStatsbeatMetrics: NetworkStatsbeatMetrics | undefined;
private _longIntervalStatsbeatMetrics;
private _isStatsbeatExporter: boolean;
private _statsbeatFailureCount: number = 0;
private _batchSendRetryIntervalMs: number = DEFAULT_BATCH_SEND_RETRY_INTERVAL_MS;
Expand Down Expand Up @@ -68,7 +70,11 @@ export abstract class AzureMonitorBaseExporter {

if (!this._isStatsbeatExporter) {
// Initialize statsbeatMetrics
this._statsbeatMetrics = new StatsbeatMetrics({
this._networkStatsbeatMetrics = new NetworkStatsbeatMetrics({
instrumentationKey: this._instrumentationKey,
endpointUrl: this._endpointUrl,
});
this._longIntervalStatsbeatMetrics = getInstance({
instrumentationKey: this._instrumentationKey,
endpointUrl: this._endpointUrl,
});
Expand Down Expand Up @@ -128,12 +134,12 @@ export abstract class AzureMonitorBaseExporter {
this._retryTimer.unref();
}
// If we are not exportings statsbeat and statsbeat is not disabled -- count success
this._statsbeatMetrics?.countSuccess(duration);
this._networkStatsbeatMetrics?.countSuccess(duration);
return { code: ExportResultCode.SUCCESS };
} else if (statusCode && isRetriable(statusCode)) {
// Failed -- persist failed data
if (statusCode === 429 || statusCode === 439) {
this._statsbeatMetrics?.countThrottle(statusCode);
this._networkStatsbeatMetrics?.countThrottle(statusCode);
}
if (result) {
diag.info(result);
Expand All @@ -147,25 +153,25 @@ export abstract class AzureMonitorBaseExporter {
});
}
if (filteredEnvelopes.length > 0) {
this._statsbeatMetrics?.countRetry(statusCode);
this._networkStatsbeatMetrics?.countRetry(statusCode);
// calls resultCallback(ExportResult) based on result of persister.push
return await this._persist(filteredEnvelopes);
}
// Failed -- not retriable
this._statsbeatMetrics?.countFailure(duration, statusCode);
this._networkStatsbeatMetrics?.countFailure(duration, statusCode);
return {
code: ExportResultCode.FAILED,
};
} else {
// calls resultCallback(ExportResult) based on result of persister.push
this._statsbeatMetrics?.countRetry(statusCode);
this._networkStatsbeatMetrics?.countRetry(statusCode);
return await this._persist(envelopes);
}
} else {
// Failed -- not retriable
if (this._statsbeatMetrics) {
if (this._networkStatsbeatMetrics) {
if (statusCode) {
this._statsbeatMetrics.countFailure(duration, statusCode);
this._networkStatsbeatMetrics.countFailure(duration, statusCode);
}
} else {
this._incrementStatsbeatFailure();
Expand Down Expand Up @@ -196,24 +202,24 @@ export abstract class AzureMonitorBaseExporter {
}
} else {
let redirectError = new Error("Circular redirect");
this._statsbeatMetrics?.countException(redirectError);
this._networkStatsbeatMetrics?.countException(redirectError);
return { code: ExportResultCode.FAILED, error: redirectError };
}
} else if (restError.statusCode && isRetriable(restError.statusCode)) {
this._statsbeatMetrics?.countRetry(restError.statusCode);
this._networkStatsbeatMetrics?.countRetry(restError.statusCode);
return await this._persist(envelopes);
}
if (this._isNetworkError(restError)) {
if (restError.statusCode) {
this._statsbeatMetrics?.countRetry(restError.statusCode);
this._networkStatsbeatMetrics?.countRetry(restError.statusCode);
}
diag.error(
"Retrying due to transient client side error. Error message:",
restError.message
);
return await this._persist(envelopes);
}
this._statsbeatMetrics?.countException(restError);
this._networkStatsbeatMetrics?.countException(restError);
diag.error(
"Envelopes could not be exported and are not retriable. Error message:",
restError.message
Expand All @@ -227,8 +233,9 @@ export abstract class AzureMonitorBaseExporter {
this._statsbeatFailureCount++;
if (this._statsbeatFailureCount > MAX_STATSBEAT_FAILURES) {
this._isStatsbeatExporter = false;
this._statsbeatMetrics?.shutdown();
this._statsbeatMetrics = undefined;
this._networkStatsbeatMetrics?.shutdown();
this._longIntervalStatsbeatMetrics?.shutdown();
this._networkStatsbeatMetrics = undefined;
this._statsbeatFailureCount = 0;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

import {
diag,
BatchObservableResult,
ObservableGauge,
ObservableResult,
Meter,
} from "@opentelemetry/api";
import { ExportResult, ExportResultCode } from "@opentelemetry/core";
import {
MeterProvider,
PeriodicExportingMetricReader,
PeriodicExportingMetricReaderOptions,
} from "@opentelemetry/sdk-metrics";
import { AzureMonitorExporterOptions, AzureMonitorStatsbeatExporter } from "../../index";
import * as ai from "../../utils/constants/applicationinsights";
import { StatsbeatMetrics } from "./statsbeatMetrics";
import {
StatsbeatCounter,
STATSBEAT_LANGUAGE,
CommonStatsbeatProperties,
AttachStatsbeatProperties,
StatsbeatFeatureType,
StatsbeatOptions,
} from "./types";

let instance: LongIntervalStatsbeatMetrics | null = null;

/**
* Long Interval Statsbeat Metrics
* @internal
*/
class LongIntervalStatsbeatMetrics extends StatsbeatMetrics {
private _AZURE_MONITOR_STATSBEAT_FEATURES = process.env.AZURE_MONITOR_STATSBEAT_FEATURES;
private _statsCollectionLongInterval: number = 86400000; // 1 day
private _isInitialized: boolean = false;

// Custom dimensions
private _cikey: string;
private _runtimeVersion: string;
private _language: string;
private _version: string;
private _attach: string = "sdk";

private _commonProperties: CommonStatsbeatProperties;
private _attachProperties: AttachStatsbeatProperties;

private _feature: number = 0;
private _instrumentation: number = 0;

private _longIntervalStatsbeatMeterProvider: MeterProvider;
private _longIntervalAzureExporter: AzureMonitorStatsbeatExporter;
private _longIntervalMetricReader: PeriodicExportingMetricReader;
private _longIntervalStatsbeatMeter: Meter;

// Network Attributes
private _connectionString: string;

// Observable Gauges
private _featureStatsbeatGauge: ObservableGauge;
private _attachStatsbeatGauge: ObservableGauge;

constructor(options: StatsbeatOptions) {
super();
this._connectionString = super._getConnectionString(options.endpointUrl);
const exporterConfig: AzureMonitorExporterOptions = {
connectionString: this._connectionString,
};

if (this._AZURE_MONITOR_STATSBEAT_FEATURES) {
try {
this._feature = JSON.parse(this._AZURE_MONITOR_STATSBEAT_FEATURES).feature;
this._instrumentation = JSON.parse(this._AZURE_MONITOR_STATSBEAT_FEATURES).instrumentation;
} catch (error) {
diag.error(
`LongIntervalStatsbeat: Failed to parse features/instrumentations (error ${error})`
);
}
}

this._longIntervalStatsbeatMeterProvider = new MeterProvider();
this._longIntervalAzureExporter = new AzureMonitorStatsbeatExporter(exporterConfig);

// Export Long Interval Statsbeats every day
const longIntervalMetricReaderOptions: PeriodicExportingMetricReaderOptions = {
exporter: this._longIntervalAzureExporter,
exportIntervalMillis:
Number(process.env.LONG_INTERVAL_EXPORT_MILLIS) || this._statsCollectionLongInterval, // 1 day
};

this._longIntervalMetricReader = new PeriodicExportingMetricReader(
longIntervalMetricReaderOptions
);
this._longIntervalStatsbeatMeterProvider.addMetricReader(this._longIntervalMetricReader);
this._longIntervalStatsbeatMeter = this._longIntervalStatsbeatMeterProvider.getMeter(
"Azure Monitor Long Interval Statsbeat"
);

// Assign Common Properties
this._runtimeVersion = process.version;
this._language = STATSBEAT_LANGUAGE;
this._version = ai.packageVersion;
this._cikey = options.instrumentationKey;

this._featureStatsbeatGauge = this._longIntervalStatsbeatMeter.createObservableGauge(
StatsbeatCounter.FEATURE
);
this._attachStatsbeatGauge = this._longIntervalStatsbeatMeter.createObservableGauge(
StatsbeatCounter.ATTACH
);

this._commonProperties = {
os: super._os,
rp: super._resourceProvider,
cikey: this._cikey,
runtimeVersion: this._runtimeVersion,
language: this._language,
version: this._version,
attach: this._attach,
};

this._attachProperties = {
rpId: super._resourceIdentifier,
};

this._isInitialized = true;
this._initialize();
}

private async _initialize() {
try {
await this._getResourceProvider();

// Add long interval observable callbacks
this._attachStatsbeatGauge.addCallback(this._attachCallback.bind(this));
this._longIntervalStatsbeatMeter.addBatchObservableCallback(
this._featureCallback.bind(this),
[this._featureStatsbeatGauge]
);

// Export Feature/Attach Statsbeat once upon app initialization
this._longIntervalAzureExporter.export(
(await this._longIntervalMetricReader.collect()).resourceMetrics,
(result: ExportResult) => {
if (result.code !== ExportResultCode.SUCCESS) {
diag.error(`LongIntervalStatsbeat: metrics export failed (error ${result.error})`);
}
}
);
} catch (error) {
diag.debug("Call to get the resource provider failed.");
}
}

private _featureCallback(observableResult: BatchObservableResult) {
let attributes;
if (this._instrumentation) {
attributes = {
...this._commonProperties,
feature: this._instrumentation,
type: StatsbeatFeatureType.INSTRUMENTATION,
};
observableResult.observe(this._featureStatsbeatGauge, 1, { ...attributes });
}

if (this._feature) {
attributes = {
...this._commonProperties,
feature: this._feature,
type: StatsbeatFeatureType.FEATURE,
};
observableResult.observe(this._featureStatsbeatGauge, 1, { ...attributes });
}
}

private _attachCallback(observableResult: ObservableResult) {
let attributes = { ...this._commonProperties, ...this._attachProperties };
observableResult.observe(1, attributes);
}

public isInitialized() {
return this._isInitialized;
}

public shutdown() {
this._longIntervalStatsbeatMeterProvider.shutdown();
}
}

/**
* Singleton LongIntervalStatsbeatMetrics instance.
* @internal
*/
export function getInstance(options: StatsbeatOptions): LongIntervalStatsbeatMetrics {
if (!instance) {
instance = new LongIntervalStatsbeatMetrics(options);
}
return instance;
}
Loading

0 comments on commit 1584998

Please sign in to comment.