From ee274c816f2f4091b0e1b9a9e9c060ff2cdded6e Mon Sep 17 00:00:00 2001 From: Alexi Doak <109488926+doakalexi@users.noreply.github.com> Date: Mon, 12 Feb 2024 08:22:29 -0800 Subject: [PATCH] [ResponseOps] Add telemetry for the es query rule types (#176451) Resolves https://github.com/elastic/kibana/issues/176237 ## Summary Adds new telemetry fields to track the ES Query rule search types. ### Checklist - [ ] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios ### To verify - Create a couple of ES Query rules with the different search types - Create a rule that is not an ES Query rule - Change alerting telemetry task [schedule](https://github.com/doakalexi/kibana/blob/main/x-pack/plugins/alerting/server/usage/task.ts#L28) interval 1 min - Run [Telemetry usage payload API](https://docs.elastic.dev/telemetry/collection/snapshot-telemetry#telemetry-usage-payload-api) in your browser console to verify the new telemetry data under `count_by_type` and `count_active_by_type` --- .../lib/get_telemetry_from_kibana.test.ts | 42 ++++++++++++ .../usage/lib/get_telemetry_from_kibana.ts | 31 ++++++++- .../lib/group_rules_by_search_type.test.ts | 33 ++++++++++ .../usage/lib/group_rules_by_search_type.ts | 18 +++++ .../alerting_and_actions_telemetry.ts | 66 ++++++++++++++++--- 5 files changed, 178 insertions(+), 12 deletions(-) create mode 100644 x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.test.ts create mode 100644 x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.ts diff --git a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts index 58a449d5f7004..f29602458fd50 100644 --- a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts +++ b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts @@ -139,6 +139,24 @@ describe('kibana index telemetry', () => { }, ], }, + by_search_type: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'esQuery', + doc_count: 0, + }, + { + key: 'searchSource', + doc_count: 1, + }, + { + key: 'esqlQuery', + doc_count: 3, + }, + ], + }, max_throttle_time: { value: 60 }, min_throttle_time: { value: 0 }, avg_throttle_time: { value: 30 }, @@ -174,6 +192,9 @@ describe('kibana index telemetry', () => { document__test__: 1, // eslint-disable-next-line @typescript-eslint/naming-convention logs__alert__document__count: 1, + '__es-query_es_query': 0, + '__es-query_esql_query': 3, + '__es-query_search_source': 1, }, count_total: 4, hasErrors: false, @@ -328,6 +349,24 @@ describe('kibana index telemetry', () => { }, ], }, + by_search_type: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'esQuery', + doc_count: 0, + }, + { + key: 'searchSource', + doc_count: 1, + }, + { + key: 'esqlQuery', + doc_count: 3, + }, + ], + }, }, }); @@ -345,6 +384,9 @@ describe('kibana index telemetry', () => { document__test__: 1, // eslint-disable-next-line @typescript-eslint/naming-convention logs__alert__document__count: 1, + '__es-query_es_query': 0, + '__es-query_esql_query': 3, + '__es-query_search_source': 1, }, countNamespaces: 1, countTotal: 4, diff --git a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts index a1055aa075521..ecaf99ffc44a3 100644 --- a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts +++ b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts @@ -22,6 +22,7 @@ import { groupRulesByStatus } from './group_rules_by_status'; import { AlertingUsage } from '../types'; import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector'; import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket'; +import { groupRulesBySearchType } from './group_rules_by_search_type'; interface Opts { esClient: ElasticsearchClient; @@ -258,6 +259,11 @@ export async function getTotalCountAggregations({ }, }, }, + by_search_type: { + terms: { + field: 'alert.params.searchType', + }, + }, sum_rules_with_tags: { sum: { field: 'rule_with_tags' } }, sum_rules_snoozed: { sum: { field: 'rule_snoozed' } }, sum_rules_muted: { sum: { field: 'rule_muted' } }, @@ -285,6 +291,7 @@ export async function getTotalCountAggregations({ by_execution_status: AggregationsTermsAggregateBase; by_notify_when: AggregationsTermsAggregateBase; connector_types_by_consumers: AggregationsTermsAggregateBase; + by_search_type: AggregationsTermsAggregateBase; sum_rules_with_tags: AggregationsSingleMetricAggregateBase; sum_rules_snoozed: AggregationsSingleMetricAggregateBase; sum_rules_muted: AggregationsSingleMetricAggregateBase; @@ -306,10 +313,17 @@ export async function getTotalCountAggregations({ aggregations.connector_types_by_consumers.buckets ); + const countRulesBySearchType = groupRulesBySearchType( + parseSimpleRuleTypeBucket(aggregations.by_search_type.buckets) + ); + return { hasErrors: false, count_total: totalRulesCount ?? 0, - count_by_type: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets), + count_by_type: { + ...parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets), + ...countRulesBySearchType, + }, count_rules_by_execution_status: countRulesByExecutionStatus, count_rules_with_tags: aggregations.sum_rules_with_tags.value ?? 0, count_rules_by_notify_when: countRulesByNotifyWhen, @@ -422,6 +436,11 @@ export async function getTotalCountInUse({ size: NUM_ALERTING_RULE_TYPES, }, }, + by_search_type: { + terms: { + field: 'alert.params.searchType', + }, + }, }, }, }; @@ -434,15 +453,23 @@ export async function getTotalCountInUse({ const aggregations = results.aggregations as { by_rule_type_id: AggregationsTermsAggregateBase; namespaces_count: AggregationsCardinalityAggregate; + by_search_type: AggregationsTermsAggregateBase; }; const totalEnabledRulesCount = typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value; + const countRulesBySearchType = groupRulesBySearchType( + parseSimpleRuleTypeBucket(aggregations.by_search_type.buckets) + ); + return { hasErrors: false, countTotal: totalEnabledRulesCount ?? 0, - countByType: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets), + countByType: { + ...parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets), + ...countRulesBySearchType, + }, countNamespaces: aggregations.namespaces_count.value ?? 0, }; } catch (err) { diff --git a/x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.test.ts b/x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.test.ts new file mode 100644 index 0000000000000..b82c8b49d1ba0 --- /dev/null +++ b/x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.test.ts @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { groupRulesBySearchType } from './group_rules_by_search_type'; + +describe('groupRulesBySearchType', () => { + test('should correctly group search types', () => { + expect( + groupRulesBySearchType({ + esQuery: 1, + searchSource: 2, + esqlQuery: 3, + foo: 5, + }) + ).toEqual({ + '__es-query_es_query': 1, + '__es-query_search_source': 2, + '__es-query_esql_query': 3, + }); + }); + + test('should fallback to 0 if any of the expected search types are absent', () => { + expect(groupRulesBySearchType({ unknown: 100, bar: 300 })).toEqual({ + '__es-query_es_query': 0, + '__es-query_search_source': 0, + '__es-query_esql_query': 0, + }); + }); +}); diff --git a/x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.ts b/x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.ts new file mode 100644 index 0000000000000..b97ac049c2374 --- /dev/null +++ b/x-pack/plugins/alerting/server/usage/lib/group_rules_by_search_type.ts @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { AlertingUsage } from '../types'; + +export function groupRulesBySearchType( + rulesBySearchType: Record +): AlertingUsage['count_by_type'] { + return { + '__es-query_es_query': rulesBySearchType.esQuery ?? 0, + '__es-query_search_source': rulesBySearchType.searchSource ?? 0, + '__es-query_esql_query': rulesBySearchType.esqlQuery ?? 0, + }; +} diff --git a/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts b/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts index 0823665f43f64..afe7275e808b1 100644 --- a/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts +++ b/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts @@ -28,7 +28,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F describe('test telemetry', () => { const objectRemover = new ObjectRemover(supertest); - const alwaysFiringRuleId: { [key: string]: string } = {}; + const esQueryRuleId: { [key: string]: string } = {}; beforeEach(async () => { await esTestIndexTool.destroy(); @@ -90,7 +90,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F connectorTypeId: 'test.excluded', }); - alwaysFiringRuleId[space.id] = await createRule({ + await createRule({ space: space.id, ruleOverwrites: { rule_type_id: 'test.patternFiring', @@ -158,6 +158,28 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F actions: [], }, }); + // ES query rule + esQueryRuleId[space.id] = await createRule({ + space: space.id, + ruleOverwrites: { + rule_type_id: '.es-query', + schedule: { interval: '1h' }, + throttle: null, + params: { + size: 100, + timeWindowSize: 5, + timeWindowUnit: 'm', + thresholdComparator: '>', + threshold: [0], + searchType: 'esqlQuery', + esqlQuery: { + esql: 'from .kibana-alerting-test-data | stats c = count(date) | where c < 0', + }, + timeField: 'date_epoch_millis', + }, + actions: [], + }, + }); } } @@ -220,7 +242,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F function verifyAlertingTelemetry(telemetry: any) { logger.info(`alerting telemetry - ${JSON.stringify(telemetry)}`); // total number of enabled rules - expect(telemetry.count_active_total).to.equal(9); + expect(telemetry.count_active_total).to.equal(12); // total number of disabled rules expect(telemetry.count_disabled_total).to.equal(3); @@ -230,18 +252,26 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F expect(telemetry.count_by_type.test__patternFiring).to.equal(3); expect(telemetry.count_by_type.test__multipleSearches).to.equal(3); expect(telemetry.count_by_type.test__throw).to.equal(3); + expect(telemetry.count_by_type['__es-query']).to.equal(3); + expect(telemetry.count_by_type['__es-query_es_query']).to.equal(0); + expect(telemetry.count_by_type['__es-query_search_source']).to.equal(0); + expect(telemetry.count_by_type['__es-query_esql_query']).to.equal(3); // total number of enabled rules broken down by rule type expect(telemetry.count_active_by_type.test__patternFiring).to.equal(3); expect(telemetry.count_active_by_type.test__multipleSearches).to.equal(3); expect(telemetry.count_active_by_type.test__throw).to.equal(3); + expect(telemetry.count_active_by_type['__es-query']).to.equal(3); + expect(telemetry.count_active_by_type['__es-query_es_query']).to.equal(0); + expect(telemetry.count_active_by_type['__es-query_search_source']).to.equal(0); + expect(telemetry.count_active_by_type['__es-query_esql_query']).to.equal(3); // throttle time stats expect(telemetry.throttle_time.min).to.equal('0s'); - expect(telemetry.throttle_time.avg).to.equal('0.4s'); + expect(telemetry.throttle_time.avg).to.equal('0.3333333333333333s'); expect(telemetry.throttle_time.max).to.equal('1s'); expect(telemetry.throttle_time_number_s.min).to.equal(0); - expect(telemetry.throttle_time_number_s.avg).to.equal(0.4); + expect(telemetry.throttle_time_number_s.avg).to.equal(0.3333333333333333); expect(telemetry.throttle_time_number_s.max).to.equal(1); // schedule interval stats @@ -254,7 +284,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F // attached connectors stats expect(telemetry.connectors_per_alert.min).to.equal(0); - expect(telemetry.connectors_per_alert.avg).to.equal(1); + expect(telemetry.connectors_per_alert.avg).to.equal(0.8); expect(telemetry.connectors_per_alert.max).to.equal(3); // number of spaces with rules @@ -269,6 +299,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F expect(telemetry.count_by_type.test__noop >= 3).to.be(true); expect(telemetry.count_by_type.test__multipleSearches >= 3).to.be(true); expect(telemetry.count_by_type.test__throw >= 3).to.be(true); + expect(telemetry.count_by_type['__es-query'] >= 3).to.be(true); // average execution time - just checking for non-zero as we can't set an exact number expect(telemetry.avg_execution_time_per_day > 0).to.be(true); @@ -277,6 +308,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F expect(telemetry.avg_execution_time_by_type_per_day.test__patternFiring > 0).to.be(true); expect(telemetry.avg_execution_time_by_type_per_day.test__multipleSearches > 0).to.be(true); expect(telemetry.avg_execution_time_by_type_per_day.test__throw > 0).to.be(true); + expect(telemetry.avg_execution_time_by_type_per_day['__es-query'] > 0).to.be(true); // average es search time - just checking for non-zero as we can't set an exact number expect(telemetry.avg_es_search_duration_per_day > 0).to.be(true); @@ -360,6 +392,16 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F telemetry.percentile_num_generated_actions_by_type_per_day.p99.test__multipleSearches ).to.equal(0); + expect(telemetry.percentile_num_generated_actions_by_type_per_day.p50['__es-query']).to.equal( + 0 + ); + expect(telemetry.percentile_num_generated_actions_by_type_per_day.p90['__es-query']).to.equal( + 0 + ); + expect(telemetry.percentile_num_generated_actions_by_type_per_day.p99['__es-query']).to.equal( + 0 + ); + // percentile calculations for number of alerts expect(telemetry.percentile_num_alerts_per_day.p50 >= 0).to.be(true); expect(telemetry.percentile_num_alerts_per_day.p90 >= 0).to.be(true); @@ -392,17 +434,21 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F 0 ); + expect(telemetry.percentile_num_alerts_by_type_per_day.p50['__es-query']).to.equal(0); + expect(telemetry.percentile_num_alerts_by_type_per_day.p90['__es-query']).to.equal(0); + expect(telemetry.percentile_num_alerts_by_type_per_day.p99['__es-query']).to.equal(0); + // rules grouped by execution status expect(telemetry.count_rules_by_execution_status.success > 0).to.be(true); expect(telemetry.count_rules_by_execution_status.error > 0).to.be(true); expect(telemetry.count_rules_by_execution_status.warning).to.equal(0); // number of rules that has tags - expect(telemetry.count_rules_with_tags).to.equal(12); + expect(telemetry.count_rules_with_tags).to.equal(15); // rules grouped by notify when expect(telemetry.count_rules_by_notify_when.on_action_group_change).to.equal(0); expect(telemetry.count_rules_by_notify_when.on_active_alert).to.equal(0); - expect(telemetry.count_rules_by_notify_when.on_throttle_interval).to.equal(12); + expect(telemetry.count_rules_by_notify_when.on_throttle_interval).to.equal(15); // rules snoozed expect(telemetry.count_rules_snoozed).to.equal(0); // rules muted @@ -427,7 +473,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F getService, spaceId: Spaces[2].id, type: 'alert', - id: alwaysFiringRuleId[Spaces[2].id], + id: esQueryRuleId[Spaces[2].id], provider: 'alerting', actions: new Map([['execute', { gte: 1 }]]), }); @@ -474,7 +520,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F expect(taskState).not.to.be(undefined); alertingTelemetry = JSON.parse(taskState!); expect(alertingTelemetry.runs > 0).to.be(true); - expect(alertingTelemetry.count_total).to.equal(12); + expect(alertingTelemetry.count_total).to.equal(15); }); verifyAlertingTelemetry(alertingTelemetry);