Skip to content

Commit

Permalink
[ResponseOps] Add telemetry for the es query rule types (#176451)
Browse files Browse the repository at this point in the history
Resolves #176237

## Summary

Adds new telemetry fields to track the ES Query rule search types.

### Checklist

- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios


### To verify

- Create a couple of ES Query rules with the different search types
- Create a rule that is not an ES Query rule

- Change alerting telemetry task
[schedule](https://github.com/doakalexi/kibana/blob/main/x-pack/plugins/alerting/server/usage/task.ts#L28)
interval 1 min

- Run [Telemetry usage payload
API](https://docs.elastic.dev/telemetry/collection/snapshot-telemetry#telemetry-usage-payload-api)
in your browser console to verify the new telemetry data under
`count_by_type` and `count_active_by_type`
  • Loading branch information
doakalexi authored Feb 12, 2024
1 parent 4566ef7 commit d89097a
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,24 @@ describe('kibana index telemetry', () => {
},
],
},
by_search_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'esQuery',
doc_count: 0,
},
{
key: 'searchSource',
doc_count: 1,
},
{
key: 'esqlQuery',
doc_count: 3,
},
],
},
max_throttle_time: { value: 60 },
min_throttle_time: { value: 0 },
avg_throttle_time: { value: 30 },
Expand Down Expand Up @@ -174,6 +192,9 @@ describe('kibana index telemetry', () => {
document__test__: 1,
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 1,
'__es-query_es_query': 0,
'__es-query_esql_query': 3,
'__es-query_search_source': 1,
},
count_total: 4,
hasErrors: false,
Expand Down Expand Up @@ -328,6 +349,24 @@ describe('kibana index telemetry', () => {
},
],
},
by_search_type: {
doc_count_error_upper_bound: 0,
sum_other_doc_count: 0,
buckets: [
{
key: 'esQuery',
doc_count: 0,
},
{
key: 'searchSource',
doc_count: 1,
},
{
key: 'esqlQuery',
doc_count: 3,
},
],
},
},
});

Expand All @@ -345,6 +384,9 @@ describe('kibana index telemetry', () => {
document__test__: 1,
// eslint-disable-next-line @typescript-eslint/naming-convention
logs__alert__document__count: 1,
'__es-query_es_query': 0,
'__es-query_esql_query': 3,
'__es-query_search_source': 1,
},
countNamespaces: 1,
countTotal: 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { groupRulesByStatus } from './group_rules_by_status';
import { AlertingUsage } from '../types';
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
import { groupRulesBySearchType } from './group_rules_by_search_type';

interface Opts {
esClient: ElasticsearchClient;
Expand Down Expand Up @@ -258,6 +259,11 @@ export async function getTotalCountAggregations({
},
},
},
by_search_type: {
terms: {
field: 'alert.params.searchType',
},
},
sum_rules_with_tags: { sum: { field: 'rule_with_tags' } },
sum_rules_snoozed: { sum: { field: 'rule_snoozed' } },
sum_rules_muted: { sum: { field: 'rule_muted' } },
Expand Down Expand Up @@ -285,6 +291,7 @@ export async function getTotalCountAggregations({
by_execution_status: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
by_notify_when: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
connector_types_by_consumers: AggregationsTermsAggregateBase<ConnectorsByConsumersBucket>;
by_search_type: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
sum_rules_with_tags: AggregationsSingleMetricAggregateBase;
sum_rules_snoozed: AggregationsSingleMetricAggregateBase;
sum_rules_muted: AggregationsSingleMetricAggregateBase;
Expand All @@ -306,10 +313,17 @@ export async function getTotalCountAggregations({
aggregations.connector_types_by_consumers.buckets
);

const countRulesBySearchType = groupRulesBySearchType(
parseSimpleRuleTypeBucket(aggregations.by_search_type.buckets)
);

return {
hasErrors: false,
count_total: totalRulesCount ?? 0,
count_by_type: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
count_by_type: {
...parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
...countRulesBySearchType,
},
count_rules_by_execution_status: countRulesByExecutionStatus,
count_rules_with_tags: aggregations.sum_rules_with_tags.value ?? 0,
count_rules_by_notify_when: countRulesByNotifyWhen,
Expand Down Expand Up @@ -422,6 +436,11 @@ export async function getTotalCountInUse({
size: NUM_ALERTING_RULE_TYPES,
},
},
by_search_type: {
terms: {
field: 'alert.params.searchType',
},
},
},
},
};
Expand All @@ -434,15 +453,23 @@ export async function getTotalCountInUse({
const aggregations = results.aggregations as {
by_rule_type_id: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
namespaces_count: AggregationsCardinalityAggregate;
by_search_type: AggregationsTermsAggregateBase<AggregationsStringTermsBucketKeys>;
};

const totalEnabledRulesCount =
typeof results.hits.total === 'number' ? results.hits.total : results.hits.total?.value;

const countRulesBySearchType = groupRulesBySearchType(
parseSimpleRuleTypeBucket(aggregations.by_search_type.buckets)
);

return {
hasErrors: false,
countTotal: totalEnabledRulesCount ?? 0,
countByType: parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
countByType: {
...parseSimpleRuleTypeBucket(aggregations.by_rule_type_id.buckets),
...countRulesBySearchType,
},
countNamespaces: aggregations.namespaces_count.value ?? 0,
};
} catch (err) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { groupRulesBySearchType } from './group_rules_by_search_type';

describe('groupRulesBySearchType', () => {
test('should correctly group search types', () => {
expect(
groupRulesBySearchType({
esQuery: 1,
searchSource: 2,
esqlQuery: 3,
foo: 5,
})
).toEqual({
'__es-query_es_query': 1,
'__es-query_search_source': 2,
'__es-query_esql_query': 3,
});
});

test('should fallback to 0 if any of the expected search types are absent', () => {
expect(groupRulesBySearchType({ unknown: 100, bar: 300 })).toEqual({
'__es-query_es_query': 0,
'__es-query_search_source': 0,
'__es-query_esql_query': 0,
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { AlertingUsage } from '../types';

export function groupRulesBySearchType(
rulesBySearchType: Record<string, number>
): AlertingUsage['count_by_type'] {
return {
'__es-query_es_query': rulesBySearchType.esQuery ?? 0,
'__es-query_search_source': rulesBySearchType.searchSource ?? 0,
'__es-query_esql_query': rulesBySearchType.esqlQuery ?? 0,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F

describe('test telemetry', () => {
const objectRemover = new ObjectRemover(supertest);
const alwaysFiringRuleId: { [key: string]: string } = {};
const esQueryRuleId: { [key: string]: string } = {};

beforeEach(async () => {
await esTestIndexTool.destroy();
Expand Down Expand Up @@ -90,7 +90,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
connectorTypeId: 'test.excluded',
});

alwaysFiringRuleId[space.id] = await createRule({
await createRule({
space: space.id,
ruleOverwrites: {
rule_type_id: 'test.patternFiring',
Expand Down Expand Up @@ -158,6 +158,28 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
actions: [],
},
});
// ES query rule
esQueryRuleId[space.id] = await createRule({
space: space.id,
ruleOverwrites: {
rule_type_id: '.es-query',
schedule: { interval: '1h' },
throttle: null,
params: {
size: 100,
timeWindowSize: 5,
timeWindowUnit: 'm',
thresholdComparator: '>',
threshold: [0],
searchType: 'esqlQuery',
esqlQuery: {
esql: 'from .kibana-alerting-test-data | stats c = count(date) | where c < 0',
},
timeField: 'date_epoch_millis',
},
actions: [],
},
});
}
}

Expand Down Expand Up @@ -220,7 +242,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
function verifyAlertingTelemetry(telemetry: any) {
logger.info(`alerting telemetry - ${JSON.stringify(telemetry)}`);
// total number of enabled rules
expect(telemetry.count_active_total).to.equal(9);
expect(telemetry.count_active_total).to.equal(12);

// total number of disabled rules
expect(telemetry.count_disabled_total).to.equal(3);
Expand All @@ -230,18 +252,26 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
expect(telemetry.count_by_type.test__patternFiring).to.equal(3);
expect(telemetry.count_by_type.test__multipleSearches).to.equal(3);
expect(telemetry.count_by_type.test__throw).to.equal(3);
expect(telemetry.count_by_type['__es-query']).to.equal(3);
expect(telemetry.count_by_type['__es-query_es_query']).to.equal(0);
expect(telemetry.count_by_type['__es-query_search_source']).to.equal(0);
expect(telemetry.count_by_type['__es-query_esql_query']).to.equal(3);

// total number of enabled rules broken down by rule type
expect(telemetry.count_active_by_type.test__patternFiring).to.equal(3);
expect(telemetry.count_active_by_type.test__multipleSearches).to.equal(3);
expect(telemetry.count_active_by_type.test__throw).to.equal(3);
expect(telemetry.count_active_by_type['__es-query']).to.equal(3);
expect(telemetry.count_active_by_type['__es-query_es_query']).to.equal(0);
expect(telemetry.count_active_by_type['__es-query_search_source']).to.equal(0);
expect(telemetry.count_active_by_type['__es-query_esql_query']).to.equal(3);

// throttle time stats
expect(telemetry.throttle_time.min).to.equal('0s');
expect(telemetry.throttle_time.avg).to.equal('0.4s');
expect(telemetry.throttle_time.avg).to.equal('0.3333333333333333s');
expect(telemetry.throttle_time.max).to.equal('1s');
expect(telemetry.throttle_time_number_s.min).to.equal(0);
expect(telemetry.throttle_time_number_s.avg).to.equal(0.4);
expect(telemetry.throttle_time_number_s.avg).to.equal(0.3333333333333333);
expect(telemetry.throttle_time_number_s.max).to.equal(1);

// schedule interval stats
Expand All @@ -254,7 +284,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F

// attached connectors stats
expect(telemetry.connectors_per_alert.min).to.equal(0);
expect(telemetry.connectors_per_alert.avg).to.equal(1);
expect(telemetry.connectors_per_alert.avg).to.equal(0.8);
expect(telemetry.connectors_per_alert.max).to.equal(3);

// number of spaces with rules
Expand All @@ -269,6 +299,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
expect(telemetry.count_by_type.test__noop >= 3).to.be(true);
expect(telemetry.count_by_type.test__multipleSearches >= 3).to.be(true);
expect(telemetry.count_by_type.test__throw >= 3).to.be(true);
expect(telemetry.count_by_type['__es-query'] >= 3).to.be(true);

// average execution time - just checking for non-zero as we can't set an exact number
expect(telemetry.avg_execution_time_per_day > 0).to.be(true);
Expand All @@ -277,6 +308,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
expect(telemetry.avg_execution_time_by_type_per_day.test__patternFiring > 0).to.be(true);
expect(telemetry.avg_execution_time_by_type_per_day.test__multipleSearches > 0).to.be(true);
expect(telemetry.avg_execution_time_by_type_per_day.test__throw > 0).to.be(true);
expect(telemetry.avg_execution_time_by_type_per_day['__es-query'] > 0).to.be(true);

// average es search time - just checking for non-zero as we can't set an exact number
expect(telemetry.avg_es_search_duration_per_day > 0).to.be(true);
Expand Down Expand Up @@ -360,6 +392,16 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
telemetry.percentile_num_generated_actions_by_type_per_day.p99.test__multipleSearches
).to.equal(0);

expect(telemetry.percentile_num_generated_actions_by_type_per_day.p50['__es-query']).to.equal(
0
);
expect(telemetry.percentile_num_generated_actions_by_type_per_day.p90['__es-query']).to.equal(
0
);
expect(telemetry.percentile_num_generated_actions_by_type_per_day.p99['__es-query']).to.equal(
0
);

// percentile calculations for number of alerts
expect(telemetry.percentile_num_alerts_per_day.p50 >= 0).to.be(true);
expect(telemetry.percentile_num_alerts_per_day.p90 >= 0).to.be(true);
Expand Down Expand Up @@ -392,17 +434,21 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
0
);

expect(telemetry.percentile_num_alerts_by_type_per_day.p50['__es-query']).to.equal(0);
expect(telemetry.percentile_num_alerts_by_type_per_day.p90['__es-query']).to.equal(0);
expect(telemetry.percentile_num_alerts_by_type_per_day.p99['__es-query']).to.equal(0);

// rules grouped by execution status
expect(telemetry.count_rules_by_execution_status.success > 0).to.be(true);
expect(telemetry.count_rules_by_execution_status.error > 0).to.be(true);
expect(telemetry.count_rules_by_execution_status.warning).to.equal(0);

// number of rules that has tags
expect(telemetry.count_rules_with_tags).to.equal(12);
expect(telemetry.count_rules_with_tags).to.equal(15);
// rules grouped by notify when
expect(telemetry.count_rules_by_notify_when.on_action_group_change).to.equal(0);
expect(telemetry.count_rules_by_notify_when.on_active_alert).to.equal(0);
expect(telemetry.count_rules_by_notify_when.on_throttle_interval).to.equal(12);
expect(telemetry.count_rules_by_notify_when.on_throttle_interval).to.equal(15);
// rules snoozed
expect(telemetry.count_rules_snoozed).to.equal(0);
// rules muted
Expand All @@ -427,7 +473,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
getService,
spaceId: Spaces[2].id,
type: 'alert',
id: alwaysFiringRuleId[Spaces[2].id],
id: esQueryRuleId[Spaces[2].id],
provider: 'alerting',
actions: new Map([['execute', { gte: 1 }]]),
});
Expand Down Expand Up @@ -474,7 +520,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F
expect(taskState).not.to.be(undefined);
alertingTelemetry = JSON.parse(taskState!);
expect(alertingTelemetry.runs > 0).to.be(true);
expect(alertingTelemetry.count_total).to.equal(12);
expect(alertingTelemetry.count_total).to.equal(15);
});

verifyAlertingTelemetry(alertingTelemetry);
Expand Down

0 comments on commit d89097a

Please sign in to comment.