Skip to content

Commit

Permalink
Improve logging in API integration test and replace pRetry with a met…
Browse files Browse the repository at this point in the history
…hod from retryService (#178515)

Related to #176401, #175776

## Summary

This PR:

- Improves logging (I've added debug logs to the helpers that does an
API request such as creating a data view)
- Uses retryService instead of pRetry
- In case of throwing an error in pRetry, when we have 10 retries, it
does not log the retry attempts and we end up in the situation that is
mentioned in this [comment, item
3](#176401 (comment))
    
|Before|After|
|---|---|

|![image](https://github.com/elastic/kibana/assets/12370520/576146f2-09da-4221-a570-6d47e047f229)|![image](https://github.com/elastic/kibana/assets/12370520/0a0897a3-0bd3-4d44-9b79-8f99fb580b4a)|
- Attempts to fix flakiness in rate reason message due to having
different data

![image](https://github.com/elastic/kibana/assets/12370520/dff48ac1-a9bf-4b93-addb-fd40acae382e)


### Flaky test runner
#### Current (after adding refresh index and adjusting timeout)
- [25]
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5463
✅
- [200]
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5465
✅

#### Old
- [25]
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5452
✅
- [200]
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5454
[1 Failed : 25 Canceled: 174 Passed ]
  ##### After checking data is generated in metric threshold
- [25]
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5460
✅
- [200]
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5462
[1 Failed : 199 Canceled ]

Inspired by #173998, special
thanks to @jpdjere and @dmlemeshko for their support and knowledge
sharing.
  • Loading branch information
maryam-saeidi authored Mar 19, 2024
1 parent 28c9753 commit 57522d6
Show file tree
Hide file tree
Showing 15 changed files with 378 additions and 45 deletions.
92 changes: 92 additions & 0 deletions x-pack/test/alerting_api_integration/common/retry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { RetryService } from '@kbn/ftr-common-functional-services';
import type { ToolingLog } from '@kbn/tooling-log';

/**
* Copied from x-pack/test/security_solution_api_integration/test_suites/detections_response/utils/retry.ts
*
* Retry wrapper for async supertests, with a maximum number of retries.
* You can pass in a function that executes a supertest test, and make assertions
* on the response. If the test fails, it will retry the test the number of retries
* that are passed in.
*
* Example usage:
* ```ts
const fleetResponse = await retry<InstallPackageResponse>({
test: async () => {
const testResponse = await supertest
.post(`/api/fleet/epm/packages/security_detection_engine`)
.set('kbn-xsrf', 'xxxx')
.set('elastic-api-version', '2023-10-31')
.type('application/json')
.send({ force: true })
.expect(200);
expect((testResponse.body as InstallPackageResponse).items).toBeDefined();
expect((testResponse.body as InstallPackageResponse).items.length).toBeGreaterThan(0);
return testResponse.body;
},
retryService,
retries: MAX_RETRIES,
timeout: ATTEMPT_TIMEOUT,
});
* ```
* @param test The function containing a test to run
* @param retryService The retry service to use
* @param retries The maximum number of retries
* @param timeout The timeout for each retry
* @param retryDelay The delay between each retry
* @returns The response from the test
*/
export const retry = async <T>({
test,
retryService,
utilityName,
retries = 3,
timeout = 30000,
retryDelay = 200,
logger,
}: {
test: () => Promise<T>;
utilityName: string;
retryService: RetryService;
retries?: number;
timeout?: number;
retryDelay?: number;
logger: ToolingLog;
}): Promise<T> => {
let retryAttempt = 0;
const response = await retryService.tryForTime(
timeout,
async () => {
if (retryAttempt > retries) {
// Log error message if we reached the maximum number of retries
// but don't throw an error, return it to break the retry loop.
const errorMessage = `Reached maximum number of retries for test ${utilityName}: ${
retryAttempt - 1
}/${retries}`;
logger.error(errorMessage);
return new Error(errorMessage);
}

retryAttempt = retryAttempt + 1;

return await test();
},
undefined,
retryDelay
);

// Now throw the error in order to fail the test.
if (response instanceof Error) {
throw response;
}

return response;
};
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export default function ({ getService }: FtrProviderContext) {
const supertest = getService('supertest');
const esDeleteAllIndices = getService('esDeleteAllIndices');
const logger = getService('log');
const retryService = getService('retry');

describe('Custom Threshold rule - AVG - PCT - FIRED', () => {
const CUSTOM_THRESHOLD_RULE_ALERT_INDEX = '.alerts-observability.threshold.alerts-default';
Expand Down Expand Up @@ -60,8 +61,8 @@ export default function ({ getService }: FtrProviderContext) {
schedule: [
{
template: 'good',
start: 'now-15m',
end: 'now',
start: 'now-10m',
end: 'now+5m',
metrics: [
{ name: 'system.cpu.user.pct', method: 'linear', start: 2.5, end: 2.5 },
{ name: 'system.cpu.total.pct', method: 'linear', start: 0.5, end: 0.5 },
Expand All @@ -81,6 +82,8 @@ export default function ({ getService }: FtrProviderContext) {
esClient,
indexName: DATA_VIEW_TITLE,
docCountTarget: 270,
retryService,
logger,
});
});

Expand All @@ -105,10 +108,13 @@ export default function ({ getService }: FtrProviderContext) {
supertest,
name: 'Index Connector: Threshold API test',
indexName: ALERT_ACTION_INDEX,
logger,
});

const createdRule = await createRule({
supertest,
logger,
esClient,
tags: ['observability'],
consumer: 'logs',
name: 'Threshold rule',
Expand Down Expand Up @@ -168,6 +174,8 @@ export default function ({ getService }: FtrProviderContext) {
id: ruleId,
expectedStatus: 'active',
supertest,
retryService,
logger,
});
expect(executionStatus.status).to.be('active');
});
Expand All @@ -177,6 +185,8 @@ export default function ({ getService }: FtrProviderContext) {
esClient,
indexName: CUSTOM_THRESHOLD_RULE_ALERT_INDEX,
ruleId,
retryService,
logger,
});
alertId = (resp.hits.hits[0]._source as any)['kibana.alert.uuid'];

Expand Down Expand Up @@ -232,6 +242,8 @@ export default function ({ getService }: FtrProviderContext) {
const resp = await waitForDocumentInIndex<ActionDocument>({
esClient,
indexName: ALERT_ACTION_INDEX,
retryService,
logger,
});

expect(resp.hits.hits[0]._source?.ruleType).eql('observability.rules.custom_threshold');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export default function ({ getService }: FtrProviderContext) {
const esClient = getService('es');
const supertest = getService('supertest');
const esDeleteAllIndices = getService('esDeleteAllIndices');
const logger = getService('log');
const retryService = getService('retry');

describe('Custom Threshold rule - AVG - PCT - NoData', () => {
const CUSTOM_THRESHOLD_RULE_ALERT_INDEX = '.alerts-observability.threshold.alerts-default';
Expand All @@ -47,6 +49,7 @@ export default function ({ getService }: FtrProviderContext) {
name: DATA_VIEW,
id: DATA_VIEW_ID,
title: DATA_VIEW,
logger,
});
});

Expand All @@ -64,6 +67,7 @@ export default function ({ getService }: FtrProviderContext) {
await deleteDataView({
supertest,
id: DATA_VIEW_ID,
logger,
});
await esDeleteAllIndices([ALERT_ACTION_INDEX]);
});
Expand All @@ -74,10 +78,13 @@ export default function ({ getService }: FtrProviderContext) {
supertest,
name: 'Index Connector: Threshold API test',
indexName: ALERT_ACTION_INDEX,
logger,
});

const createdRule = await createRule({
supertest,
logger,
esClient,
tags: ['observability'],
consumer: 'logs',
name: 'Threshold rule',
Expand Down Expand Up @@ -136,6 +143,8 @@ export default function ({ getService }: FtrProviderContext) {
id: ruleId,
expectedStatus: 'active',
supertest,
retryService,
logger,
});
expect(executionStatus.status).to.be('active');
});
Expand All @@ -145,6 +154,8 @@ export default function ({ getService }: FtrProviderContext) {
esClient,
indexName: CUSTOM_THRESHOLD_RULE_ALERT_INDEX,
ruleId,
retryService,
logger,
});
alertId = (resp.hits.hits[0]._source as any)['kibana.alert.uuid'];

Expand Down Expand Up @@ -200,6 +211,8 @@ export default function ({ getService }: FtrProviderContext) {
const resp = await waitForDocumentInIndex<ActionDocument>({
esClient,
indexName: ALERT_ACTION_INDEX,
retryService,
logger,
});

expect(resp.hits.hits[0]._source?.ruleType).eql('observability.rules.custom_threshold');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ export default function ({ getService }: FtrProviderContext) {
const kibanaServerConfig = config.get('servers.kibana');
const kibanaUrl = format(kibanaServerConfig);
const supertest = getService('supertest');
const logger = getService('log');
const retryService = getService('retry');

describe('Custom Threshold rule - AVG - US - FIRED', () => {
const CUSTOM_THRESHOLD_RULE_ALERT_INDEX = '.alerts-observability.threshold.alerts-default';
Expand All @@ -57,6 +59,7 @@ export default function ({ getService }: FtrProviderContext) {
name: DATA_VIEW_NAME,
id: DATA_VIEW_ID,
title: DATA_VIEW,
logger,
});
});

Expand All @@ -76,6 +79,7 @@ export default function ({ getService }: FtrProviderContext) {
await deleteDataView({
supertest,
id: DATA_VIEW_ID,
logger,
});
});

Expand All @@ -85,10 +89,13 @@ export default function ({ getService }: FtrProviderContext) {
supertest,
name: 'Index Connector: Threshold API test',
indexName: ALERT_ACTION_INDEX,
logger,
});

const createdRule = await createRule({
supertest,
logger,
esClient,
tags: ['observability'],
consumer: 'logs',
name: 'Threshold rule',
Expand Down Expand Up @@ -147,6 +154,8 @@ export default function ({ getService }: FtrProviderContext) {
id: ruleId,
expectedStatus: 'active',
supertest,
retryService,
logger,
});
expect(executionStatus.status).to.be('active');
});
Expand All @@ -156,6 +165,8 @@ export default function ({ getService }: FtrProviderContext) {
esClient,
indexName: CUSTOM_THRESHOLD_RULE_ALERT_INDEX,
ruleId,
retryService,
logger,
});
alertId = (resp.hits.hits[0]._source as any)['kibana.alert.uuid'];

Expand Down Expand Up @@ -211,6 +222,8 @@ export default function ({ getService }: FtrProviderContext) {
const resp = await waitForDocumentInIndex<ActionDocument>({
esClient,
indexName: ALERT_ACTION_INDEX,
retryService,
logger,
});

expect(resp.hits.hits[0]._source?.ruleType).eql('observability.rules.custom_threshold');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export default function ({ getService }: FtrProviderContext) {
const supertest = getService('supertest');
const esDeleteAllIndices = getService('esDeleteAllIndices');
const logger = getService('log');
const retryService = getService('retry');

describe('Custom Threshold rule - CUSTOM_EQ - AVG - BYTES - FIRED', () => {
const CUSTOM_THRESHOLD_RULE_ALERT_INDEX = '.alerts-observability.threshold.alerts-default';
Expand Down Expand Up @@ -67,12 +68,15 @@ export default function ({ getService }: FtrProviderContext) {
esClient,
indexName: DATA_VIEW,
docCountTarget: 75,
retryService,
logger,
});
await createDataView({
supertest,
name: DATA_VIEW,
id: DATA_VIEW_ID,
title: DATA_VIEW,
logger,
});
});

Expand All @@ -90,22 +94,25 @@ export default function ({ getService }: FtrProviderContext) {
await deleteDataView({
supertest,
id: DATA_VIEW_ID,
logger,
});
await esDeleteAllIndices([ALERT_ACTION_INDEX, ...dataForgeIndices]);
await cleanup({ client: esClient, config: dataForgeConfig, logger });
});

// FLAKY: https://github.com/elastic/kibana/issues/175360
describe.skip('Rule creation', () => {
describe('Rule creation', () => {
it('creates rule successfully', async () => {
actionId = await createIndexConnector({
supertest,
name: 'Index Connector: Threshold API test',
indexName: ALERT_ACTION_INDEX,
logger,
});

const createdRule = await createRule({
supertest,
logger,
esClient,
tags: ['observability'],
consumer: 'logs',
name: 'Threshold rule',
Expand Down Expand Up @@ -165,6 +172,8 @@ export default function ({ getService }: FtrProviderContext) {
id: ruleId,
expectedStatus: 'active',
supertest,
retryService,
logger,
});
expect(executionStatus.status).to.be('active');
});
Expand All @@ -174,6 +183,8 @@ export default function ({ getService }: FtrProviderContext) {
esClient,
indexName: CUSTOM_THRESHOLD_RULE_ALERT_INDEX,
ruleId,
retryService,
logger,
});
alertId = (resp.hits.hits[0]._source as any)['kibana.alert.uuid'];

Expand Down Expand Up @@ -230,16 +241,18 @@ export default function ({ getService }: FtrProviderContext) {
const resp = await waitForDocumentInIndex<ActionDocument>({
esClient,
indexName: ALERT_ACTION_INDEX,
retryService,
logger,
});

expect(resp.hits.hits[0]._source?.ruleType).eql('observability.rules.custom_threshold');
expect(resp.hits.hits[0]._source?.alertDetailsUrl).eql(
`https://localhost:5601/app/observability/alerts/${alertId}`
);
expect(resp.hits.hits[0]._source?.reason).eql(
`Custom equation is 1, above the threshold of 0.9. (duration: 1 min, data view: ${DATA_VIEW})`
`Custom equation is 1 B, above the threshold of 0.9 B. (duration: 1 min, data view: ${DATA_VIEW})`
);
expect(resp.hits.hits[0]._source?.value).eql('1');
expect(resp.hits.hits[0]._source?.value).eql('1 B');
});
});
});
Expand Down
Loading

0 comments on commit 57522d6

Please sign in to comment.