Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lens] optimize duplicate formula functions #140859

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7d802fe
collapse duplicate metric aggs
drewdaemon Sep 14, 2022
8988e38
group by emptyAsNull setting
drewdaemon Sep 14, 2022
e83ac24
make sure it doesn't touch other aggs
drewdaemon Sep 14, 2022
e92f797
test that aggConfigParams are preserved
drewdaemon Sep 14, 2022
0a3e40b
revert whitespace change
drewdaemon Sep 14, 2022
10b036d
optimize duplicate last-value functions
drewdaemon Sep 15, 2022
68375fa
Merge branch 'main' into 135265/optimize-duplicate-last-value-functions
drewdaemon Sep 15, 2022
6be5895
get timeshift from filter agg
drewdaemon Sep 19, 2022
07e8bc6
handle filtered aggs
drewdaemon Sep 19, 2022
62b306d
simplify agg deduplication logic
drewdaemon Sep 19, 2022
3a4e613
test collapsing duplicate filtered aggs
drewdaemon Sep 19, 2022
26485c3
Merge branch 'main' into 135265/optimize-redundant-formula-functions
kibanamachine Sep 19, 2022
af43cc9
Merge branch 'main' into 135265/optimize-duplicate-last-value-functions
kibanamachine Sep 19, 2022
4417f9c
Merge branch '135265/optimize-redundant-formula-functions' into 13526…
drewdaemon Sep 19, 2022
6c8414c
dedupe last value aggs
drewdaemon Sep 19, 2022
08705ca
dedupe groupByKey function
drewdaemon Sep 19, 2022
af2b52f
optimize unique values
drewdaemon Sep 20, 2022
16c083c
make sure last values doesnt touch unrelated functions
drewdaemon Sep 20, 2022
10a0ab7
perform terms order-by updates
drewdaemon Sep 21, 2022
d9174ab
port cardinality deduplication to central location
drewdaemon Sep 21, 2022
2cfbf3b
use central groupby for metrics
drewdaemon Sep 21, 2022
ed3ff94
use central groupby for last_value
drewdaemon Sep 21, 2022
69a93cb
fix some tests
drewdaemon Sep 21, 2022
9b567ea
fix bug in cardinality
drewdaemon Sep 21, 2022
aeb1bfc
improve last_value and metrics tests
drewdaemon Sep 21, 2022
65dc910
count operation
drewdaemon Sep 21, 2022
9f0f8e6
update indexpattern test
drewdaemon Sep 21, 2022
c010425
update percentiles test
drewdaemon Sep 21, 2022
0667e01
fix type
drewdaemon Sep 22, 2022
f57224b
Merge branch 'main' into 135265/optimize-duplicate-last-value-functions
kibanamachine Sep 22, 2022
906eb3e
test for dedupe_aggs
drewdaemon Sep 22, 2022
38e0ce3
Merge branch '135265/optimize-duplicate-last-value-functions' of gith…
drewdaemon Sep 22, 2022
7e104be
Merge branch 'main' of github.com:elastic/kibana into 135265/optimize…
drewdaemon Sep 22, 2022
68cc171
consolidate key generation logic
drewdaemon Sep 23, 2022
5304126
remove circ dep
drewdaemon Sep 23, 2022
5337c53
Merge branch 'main' into 135265/optimize-duplicate-last-value-functions
kibanamachine Sep 23, 2022
845822f
Merge branch 'main' into 135265/optimize-duplicate-last-value-functions
flash1293 Sep 26, 2022
a8e560d
Merge branch 'main' into 135265/optimize-duplicate-last-value-functions
kibanamachine Sep 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/plugins/data/public/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ export type {
ParsedInterval,
// expressions
ExecutionContextSearch,
ExpressionFunctionKql,
ExpressionFunctionLucene,
ExpressionFunctionKibana,
ExpressionFunctionKibanaContext,
ExpressionValueSearchContext,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import {
buildExpression,
ExpressionAstExpressionBuilder,
parseExpression,
} from '@kbn/expressions-plugin/common';
import { dedupeAggs } from './dedupe_aggs';
import { operationDefinitionMap } from './operations';
import { OriginalColumn } from './to_expression';

describe('dedupeAggs', () => {
const buildMapsFromAggBuilders = (aggs: ExpressionAstExpressionBuilder[]) => {
const esAggsIdMap: Record<string, OriginalColumn[]> = {};
const aggsToIdsMap = new Map();
aggs.forEach((builder, i) => {
const esAggsId = `col-${i}-${i}`;
esAggsIdMap[esAggsId] = [{ id: `original-${i}` } as OriginalColumn];
aggsToIdsMap.set(builder, esAggsId);
});
return {
esAggsIdMap,
aggsToIdsMap,
};
};

it('removes duplicate aggregations', () => {
const aggs = [
'aggSum id="0" enabled=true schema="metric" field="bytes" emptyAsNull=false',
'aggSum id="1" enabled=true schema="metric" field="bytes" emptyAsNull=false',
'aggFilteredMetric id="2" enabled=true schema="metric" \n customBucket={aggFilter id="2-filter" enabled=true schema="bucket" filter={kql q="hour_of_day: *"}} \n customMetric={aggTopMetrics id="2-metric" enabled=true schema="metric" field="hour_of_day" size=1 sortOrder="desc" sortField="timestamp"}',
'aggFilteredMetric id="3" enabled=true schema="metric" \n customBucket={aggFilter id="3-filter" enabled=true schema="bucket" filter={kql q="hour_of_day: *"}} \n customMetric={aggTopMetrics id="3-metric" enabled=true schema="metric" field="hour_of_day" size=1 sortOrder="desc" sortField="timestamp"}',
'aggAvg id="4" enabled=true schema="metric" field="bytes"',
'aggAvg id="5" enabled=true schema="metric" field="bytes"',
].map((expression) => buildExpression(parseExpression(expression)));

const { esAggsIdMap, aggsToIdsMap } = buildMapsFromAggBuilders(aggs);

// eslint-disable-next-line @typescript-eslint/naming-convention
const { sum, last_value, average } = operationDefinitionMap;

const operations = [sum, last_value, average];

operations.forEach((op) => expect(op.getGroupByKey).toBeDefined());

const { esAggsIdMap: newIdMap, aggs: newAggs } = dedupeAggs(
aggs,
esAggsIdMap,
aggsToIdsMap,
operations
);

expect(newAggs).toHaveLength(3);

expect(newIdMap).toMatchInlineSnapshot(`
Object {
"col-0-0": Array [
Object {
"id": "original-0",
},
Object {
"id": "original-1",
},
],
"col-2-2": Array [
Object {
"id": "original-2",
},
Object {
"id": "original-3",
},
],
"col-4-4": Array [
Object {
"id": "original-4",
},
Object {
"id": "original-5",
},
],
}
`);
});

it('should update any terms order-by reference', () => {
const aggs = [
'aggTerms id="0" enabled=true schema="segment" field="clientip" orderBy="3" order="desc" size=5 includeIsRegex=false excludeIsRegex=false otherBucket=true otherBucketLabel="Other" missingBucket=false missingBucketLabel="(missing value)"',
'aggMedian id="1" enabled=true schema="metric" field="bytes"',
'aggMedian id="2" enabled=true schema="metric" field="bytes"',
'aggMedian id="3" enabled=true schema="metric" field="bytes"',
].map((expression) => buildExpression(parseExpression(expression)));

const { esAggsIdMap, aggsToIdsMap } = buildMapsFromAggBuilders(aggs);

const { aggs: newAggs } = dedupeAggs(aggs, esAggsIdMap, aggsToIdsMap, [
operationDefinitionMap.median,
]);

expect(newAggs).toHaveLength(2);

expect(newAggs[0].functions[0].getArgument('orderBy')?.[0]).toBe('1');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { AggFunctionsMapping } from '@kbn/data-plugin/public';
import {
ExpressionAstExpressionBuilder,
ExpressionAstFunctionBuilder,
} from '@kbn/expressions-plugin/common';
import { GenericOperationDefinition } from './operations';
import { extractAggId, OriginalColumn } from './to_expression';

function groupByKey<T>(items: T[], getKey: (item: T) => string | undefined): Record<string, T[]> {
const groups: Record<string, T[]> = {};

items.forEach((item) => {
const key = getKey(item);
if (key) {
if (!(key in groups)) {
groups[key] = [];
}
groups[key].push(item);
}
});

return groups;
}

/**
* Consolidates duplicate agg expression builders to increase performance
*/
export function dedupeAggs(
_aggs: ExpressionAstExpressionBuilder[],
_esAggsIdMap: Record<string, OriginalColumn[]>,
aggExpressionToEsAggsIdMap: Map<ExpressionAstExpressionBuilder, string>,
allOperations: GenericOperationDefinition[]
): {
aggs: ExpressionAstExpressionBuilder[];
esAggsIdMap: Record<string, OriginalColumn[]>;
} {
let aggs = [..._aggs];
const esAggsIdMap = { ..._esAggsIdMap };

const aggsByArgs = groupByKey<ExpressionAstExpressionBuilder>(aggs, (expressionBuilder) => {
for (const operation of allOperations) {
const groupKey = operation.getGroupByKey?.(expressionBuilder);
if (groupKey) {
return `${operation.type}-${groupKey}`;
}
}
});

const termsFuncs = aggs
.map((agg) => agg.functions[0])
.filter((func) => func.name === 'aggTerms') as Array<
ExpressionAstFunctionBuilder<AggFunctionsMapping['aggTerms']>
>;

// collapse each group into a single agg expression builder
Object.values(aggsByArgs).forEach((expressionBuilders) => {
if (expressionBuilders.length <= 1) {
// don't need to optimize if there aren't more than one
return;
}

const [firstExpressionBuilder, ...restExpressionBuilders] = expressionBuilders;

// throw away all but the first expression builder
aggs = aggs.filter((aggBuilder) => !restExpressionBuilders.includes(aggBuilder));

const firstEsAggsId = aggExpressionToEsAggsIdMap.get(firstExpressionBuilder);
if (firstEsAggsId === undefined) {
throw new Error('Could not find current column ID for expression builder');
}

restExpressionBuilders.forEach((expressionBuilder) => {
const currentEsAggsId = aggExpressionToEsAggsIdMap.get(expressionBuilder);
if (currentEsAggsId === undefined) {
throw new Error('Could not find current column ID for expression builder');
}

esAggsIdMap[firstEsAggsId].push(...esAggsIdMap[currentEsAggsId]);

delete esAggsIdMap[currentEsAggsId];

termsFuncs.forEach((func) => {
if (func.getArgument('orderBy')?.[0] === extractAggId(currentEsAggsId)) {
func.replaceArgument('orderBy', [extractAggId(firstEsAggsId)]);
}
});
});
});

return { aggs, esAggsIdMap };
}
Loading