-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Obs AI Assistant] register alert details context in observability pl…
…ugin (#181501) ## Summary Closes #181286 Create a AlertDetailsContextService in `observability` plugin. The service can be used by solutions to register data handler fetching information relevant to an alert context which is then used as an input to the ai assistant when asked to investigate an alert. While only one handler is currently registered from apm plugin, the benefits are 1. making this information available to the ai assistant connector since it can't directly call apm api and 2. extending the context with additional data in the future, for example logs. #### Follow up - Move apm route and associated tests to observability plugin
- Loading branch information
Showing
25 changed files
with
711 additions
and
340 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
146 changes: 146 additions & 0 deletions
146
...ns/observability_solution/apm/server/routes/assistant_functions/get_changepoints/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import moment from 'moment'; | ||
import { LatencyAggregationType } from '../../../../common/latency_aggregation_types'; | ||
import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client'; | ||
import { ApmTimeseriesType, getApmTimeseries, TimeseriesChangePoint } from '../get_apm_timeseries'; | ||
|
||
export interface ChangePointGrouping { | ||
title: string; | ||
grouping: string; | ||
changes: TimeseriesChangePoint[]; | ||
} | ||
|
||
export async function getServiceChangePoints({ | ||
apmEventClient, | ||
alertStartedAt, | ||
serviceName, | ||
serviceEnvironment, | ||
transactionType, | ||
transactionName, | ||
}: { | ||
apmEventClient: APMEventClient; | ||
alertStartedAt: string; | ||
serviceName: string | undefined; | ||
serviceEnvironment: string | undefined; | ||
transactionType: string | undefined; | ||
transactionName: string | undefined; | ||
}): Promise<ChangePointGrouping[]> { | ||
if (!serviceName) { | ||
return []; | ||
} | ||
|
||
const res = await getApmTimeseries({ | ||
apmEventClient, | ||
arguments: { | ||
start: moment(alertStartedAt).subtract(12, 'hours').toISOString(), | ||
end: alertStartedAt, | ||
stats: [ | ||
{ | ||
title: 'Latency', | ||
'service.name': serviceName, | ||
'service.environment': serviceEnvironment, | ||
timeseries: { | ||
name: ApmTimeseriesType.transactionLatency, | ||
function: LatencyAggregationType.p95, | ||
'transaction.type': transactionType, | ||
'transaction.name': transactionName, | ||
}, | ||
}, | ||
{ | ||
title: 'Throughput', | ||
'service.name': serviceName, | ||
'service.environment': serviceEnvironment, | ||
timeseries: { | ||
name: ApmTimeseriesType.transactionThroughput, | ||
'transaction.type': transactionType, | ||
'transaction.name': transactionName, | ||
}, | ||
}, | ||
{ | ||
title: 'Failure rate', | ||
'service.name': serviceName, | ||
'service.environment': serviceEnvironment, | ||
timeseries: { | ||
name: ApmTimeseriesType.transactionFailureRate, | ||
'transaction.type': transactionType, | ||
'transaction.name': transactionName, | ||
}, | ||
}, | ||
{ | ||
title: 'Error events', | ||
'service.name': serviceName, | ||
'service.environment': serviceEnvironment, | ||
timeseries: { | ||
name: ApmTimeseriesType.errorEventRate, | ||
}, | ||
}, | ||
], | ||
}, | ||
}); | ||
|
||
return res | ||
.filter((timeseries) => timeseries.changes.length > 0) | ||
.map((timeseries) => ({ | ||
title: timeseries.stat.title, | ||
grouping: timeseries.id, | ||
changes: timeseries.changes, | ||
})); | ||
} | ||
|
||
export async function getExitSpanChangePoints({ | ||
apmEventClient, | ||
alertStartedAt, | ||
serviceName, | ||
serviceEnvironment, | ||
}: { | ||
apmEventClient: APMEventClient; | ||
alertStartedAt: string; | ||
serviceName: string | undefined; | ||
serviceEnvironment: string | undefined; | ||
}): Promise<ChangePointGrouping[]> { | ||
if (!serviceName) { | ||
return []; | ||
} | ||
|
||
const res = await getApmTimeseries({ | ||
apmEventClient, | ||
arguments: { | ||
start: moment(alertStartedAt).subtract(30, 'minute').toISOString(), | ||
end: alertStartedAt, | ||
stats: [ | ||
{ | ||
title: 'Exit span latency', | ||
'service.name': serviceName, | ||
'service.environment': serviceEnvironment, | ||
timeseries: { | ||
name: ApmTimeseriesType.exitSpanLatency, | ||
}, | ||
}, | ||
{ | ||
title: 'Exit span failure rate', | ||
'service.name': serviceName, | ||
'service.environment': serviceEnvironment, | ||
timeseries: { | ||
name: ApmTimeseriesType.exitSpanFailureRate, | ||
}, | ||
}, | ||
], | ||
}, | ||
}); | ||
|
||
return res | ||
.filter((timeseries) => timeseries.changes.length > 0) | ||
.map((timeseries) => { | ||
return { | ||
title: timeseries.stat.title, | ||
grouping: timeseries.id, | ||
changes: timeseries.changes, | ||
}; | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
...nt_functions/get_observability_alert_details_context/get_alert_details_context_handler.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { Logger } from '@kbn/core/server'; | ||
import { | ||
AlertDetailsContextualInsightsHandlerQuery, | ||
AlertDetailsContextualInsightsRequestContext, | ||
} from '@kbn/observability-plugin/server/services'; | ||
import { getApmAlertsClient } from '../../../lib/helpers/get_apm_alerts_client'; | ||
import { getApmEventClient } from '../../../lib/helpers/get_apm_event_client'; | ||
import { getMlClient } from '../../../lib/helpers/get_ml_client'; | ||
import { getRandomSampler } from '../../../lib/helpers/get_random_sampler'; | ||
import { getObservabilityAlertDetailsContext } from '.'; | ||
import { APMRouteHandlerResources } from '../../apm_routes/register_apm_server_routes'; | ||
|
||
export const getAlertDetailsContextHandler = ( | ||
resourcePlugins: APMRouteHandlerResources['plugins'], | ||
logger: Logger | ||
) => { | ||
return async ( | ||
requestContext: AlertDetailsContextualInsightsRequestContext, | ||
query: AlertDetailsContextualInsightsHandlerQuery | ||
) => { | ||
const resources = { | ||
getApmIndices: async () => { | ||
const coreContext = await requestContext.core; | ||
return resourcePlugins.apmDataAccess.setup.getApmIndices(coreContext.savedObjects.client); | ||
}, | ||
request: requestContext.request, | ||
params: { query: { _inspect: false } }, | ||
plugins: resourcePlugins, | ||
context: { | ||
core: requestContext.core, | ||
licensing: requestContext.licensing, | ||
alerting: resourcePlugins.alerting!.start().then((startContract) => { | ||
return { | ||
getRulesClient() { | ||
return startContract.getRulesClientWithRequest(requestContext.request); | ||
}, | ||
}; | ||
}), | ||
rac: resourcePlugins.ruleRegistry.start().then((startContract) => { | ||
return { | ||
getAlertsClient() { | ||
return startContract.getRacClientWithRequest(requestContext.request); | ||
}, | ||
}; | ||
}), | ||
}, | ||
}; | ||
|
||
const [apmEventClient, annotationsClient, apmAlertsClient, coreContext, mlClient] = | ||
await Promise.all([ | ||
getApmEventClient(resources), | ||
resourcePlugins.observability.setup.getScopedAnnotationsClient( | ||
resources.context, | ||
requestContext.request | ||
), | ||
getApmAlertsClient(resources), | ||
requestContext.core, | ||
getMlClient(resources), | ||
getRandomSampler({ | ||
security: resourcePlugins.security, | ||
probability: 1, | ||
request: requestContext.request, | ||
}), | ||
]); | ||
const esClient = coreContext.elasticsearch.client.asCurrentUser; | ||
|
||
return getObservabilityAlertDetailsContext({ | ||
coreContext, | ||
apmEventClient, | ||
annotationsClient, | ||
apmAlertsClient, | ||
mlClient, | ||
esClient, | ||
query, | ||
logger, | ||
}); | ||
}; | ||
}; |
85 changes: 85 additions & 0 deletions
85
...functions/get_observability_alert_details_context/get_apm_alert_details_context_prompt.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { isEmpty } from 'lodash'; | ||
import { AlertDetailsContextualInsight } from '@kbn/observability-plugin/server/services'; | ||
import { APMDownstreamDependency } from '../get_apm_downstream_dependencies'; | ||
import { ServiceSummary } from '../get_apm_service_summary'; | ||
import { LogCategories } from '../get_log_categories'; | ||
import { ApmAnomalies } from '../get_apm_service_summary/get_anomalies'; | ||
import { ChangePointGrouping } from '../get_changepoints'; | ||
|
||
export function getApmAlertDetailsContextPrompt({ | ||
serviceName, | ||
serviceEnvironment, | ||
serviceSummary, | ||
downstreamDependencies, | ||
logCategories, | ||
serviceChangePoints, | ||
exitSpanChangePoints, | ||
anomalies, | ||
}: { | ||
serviceName?: string; | ||
serviceEnvironment?: string; | ||
serviceSummary?: ServiceSummary; | ||
downstreamDependencies?: APMDownstreamDependency[]; | ||
logCategories: LogCategories; | ||
serviceChangePoints?: ChangePointGrouping[]; | ||
exitSpanChangePoints?: ChangePointGrouping[]; | ||
anomalies?: ApmAnomalies; | ||
}): AlertDetailsContextualInsight[] { | ||
const prompt: AlertDetailsContextualInsight[] = []; | ||
if (!isEmpty(serviceSummary)) { | ||
prompt.push({ | ||
key: 'serviceSummary', | ||
description: 'Metadata for the service where the alert occurred', | ||
data: serviceSummary, | ||
}); | ||
} | ||
|
||
if (!isEmpty(downstreamDependencies)) { | ||
prompt.push({ | ||
key: 'downstreamDependencies', | ||
description: `Downstream dependencies from the service "${serviceName}". Problems in these services can negatively affect the performance of "${serviceName}"`, | ||
data: downstreamDependencies, | ||
}); | ||
} | ||
|
||
if (!isEmpty(serviceChangePoints)) { | ||
prompt.push({ | ||
key: 'serviceChangePoints', | ||
description: `Significant change points for "${serviceName}". Use this to spot dips and spikes in throughput, latency and failure rate`, | ||
data: serviceChangePoints, | ||
}); | ||
} | ||
|
||
if (!isEmpty(exitSpanChangePoints)) { | ||
prompt.push({ | ||
key: 'exitSpanChangePoints', | ||
description: `Significant change points for the dependencies of "${serviceName}". Use this to spot dips or spikes in throughput, latency and failure rate for downstream dependencies`, | ||
data: exitSpanChangePoints, | ||
}); | ||
} | ||
|
||
if (!isEmpty(logCategories)) { | ||
prompt.push({ | ||
key: 'logCategories', | ||
description: `Log events occurring around the time of the alert`, | ||
data: logCategories, | ||
}); | ||
} | ||
|
||
if (!isEmpty(anomalies)) { | ||
prompt.push({ | ||
key: 'anomalies', | ||
description: `Anomalies for services running in the environment "${serviceEnvironment}"`, | ||
data: anomalies, | ||
}); | ||
} | ||
|
||
return prompt; | ||
} |
Oops, something went wrong.