Skip to content

Commit

Permalink
Support metric name as list
Browse files Browse the repository at this point in the history
Support metric name as list

Update README

Run change point analysis even for PR but don't publish or save data
fix tests, lint
  • Loading branch information
AnandInguva committed Oct 9, 2023
1 parent 2bbb348 commit 7b12e09
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 28 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/run_perf_alert_tool.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,17 @@ jobs:
- name: Run Change Point Analysis.
working-directory: ./sdks/python/apache_beam/testing/analyzers
shell: bash
run: python perf_analysis.py
run: python perf_analysis.py --config_file_path=./tests_config.yaml --save_alert_metadata
if: github.event_name != 'pull_request'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run Change Point Analysis.
working-directory: ./sdks/python/apache_beam/testing/analyzers
shell: bash
run: python perf_analysis.py --config_file_path=./tests_config.yaml
if: github.event_name == 'pull_request'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run change point analysis tests.
working-directory: ./sdks/python/apache_beam/testing/analyzers
shell: bash
Expand Down
2 changes: 0 additions & 2 deletions sdks/python/apache_beam/testing/analyzers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ These are the optional parameters that can be added to the test config in additi

- `test_target`: Identifies the test responsible for the regression.

- `test_description`: Provides a brief overview of the test's function.

- `test_name`: Denotes the name of the test as stored in the BigQuery table.

**Note**: The tool, by default, pulls metrics from BigQuery tables. Ensure that the values for `metrics_dataset`, `metrics_table`, `project`, and `metric_name` align with those defined for performance/load tests. The provided example utilizes this [test configuration](https://github.com/apache/beam/blob/0a91d139dea4276dc46176c4cdcdfce210fc50c4/.test-infra/jenkins/job_InferenceBenchmarkTests_Python.groovy#L30) to populate the necessary values for data retrieval.
Expand Down
69 changes: 48 additions & 21 deletions sdks/python/apache_beam/testing/analyzers/perf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@

import argparse
import logging
import os
import uuid
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Dict
from typing import Optional

import pandas as pd

Expand All @@ -50,6 +48,7 @@
def get_test_config_container(
params: Dict[str, Any],
test_id: str,
metric_name: str,
) -> TestConfigContainer:
"""
Args:
Expand All @@ -61,7 +60,7 @@ def get_test_config_container(
project=params['project'],
metrics_dataset=params['metrics_dataset'],
metrics_table=params['metrics_table'],
metric_name=params['metric_name'],
metric_name=metric_name,
test_id=test_id,
test_description=params['test_description'],
test_name=params.get('test_name', None),
Expand Down Expand Up @@ -89,6 +88,7 @@ def run_change_point_analysis(
test_config_container: TestConfigContainer,
big_query_metrics_fetcher: MetricsFetcher,
change_point_config: ChangePointConfig = ChangePointConfig(),
save_alert_metadata: bool = False,
):
"""
Args:
Expand All @@ -98,12 +98,14 @@ def run_change_point_analysis(
change point analysis.
change_point_config: ChangePointConfig containing parameters to run
change point analysis.
save_alert_metadata: bool indicating if issue metadata
should be published to BigQuery table.
Returns:
bool indicating if a change point is observed and alerted on GitHub.
"""
logging.info(
"Running change point analysis for test ID %s" %
test_config_container.test_id)
"Running change point analysis for test ID :%s on metric: % s" %
(test_config_container.test_id, test_config_container.metric_name))

# test_name will be used to query a single test from
# multiple tests in a single BQ table. Right now, the default
Expand Down Expand Up @@ -152,9 +154,17 @@ def run_change_point_analysis(

is_valid_change_point = True
last_reported_issue_number = None

# create a unique table name for each test and metric combination.
# for beam load tests, metric_name and metric table are enough to
# create a unique table name. For templates/IO tests, add `test_name`.
issue_metadata_table_name = (
f'{test_config_container.metrics_table}_{test_config_container.metric_name}' # pylint: disable=line-too-long
)
if test_config_container.test_name:
issue_metadata_table_name = (
f'{issue_metadata_table_name}_{test_config_container.test_name}')

existing_issue_data = get_existing_issues_data(
table_name=issue_metadata_table_name)

Expand All @@ -172,7 +182,11 @@ def run_change_point_analysis(
timestamps=timestamps,
min_runs_between_change_points=min_runs_between_change_points,
test_id=test_config_container.test_id)
if is_valid_change_point:

# for testing purposes, we don't want to create an issue even if there is
# a valid change point. This is useful when we want to test the change point
# analysis logic without creating an issue.
if is_valid_change_point and save_alert_metadata:
issue_number, issue_url = create_performance_alert(
test_config_container=test_config_container,
metric_container=metric_container,
Expand All @@ -192,7 +206,6 @@ def run_change_point_analysis(
issue_url=issue_url,
change_point_timestamp=timestamps[change_point_index],
)

publish_issue_metadata_to_big_query(
issue_metadata=issue_metadata,
table_name=issue_metadata_table_name,
Expand All @@ -204,7 +217,8 @@ def run_change_point_analysis(
def run(
*,
big_query_metrics_fetcher: MetricsFetcher = BigQueryMetricsFetcher(),
config_file_path: Optional[str] = None,
config_file_path: str = None,
save_alert_metadata: bool = False,
) -> None:
"""
run is the entry point to run change point analysis on test metric
Expand All @@ -219,20 +233,25 @@ def run(
defined in the config file.
"""
if config_file_path is None:
config_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'tests_config.yaml')

tests_config: Dict[str, Dict[str, Any]] = read_test_config(config_file_path)

for test_id, params in tests_config.items():
test_config_container = get_test_config_container(params, test_id=test_id)
change_point_config = get_change_point_config(params)
run_change_point_analysis(
test_config_container=test_config_container,
big_query_metrics_fetcher=big_query_metrics_fetcher,
change_point_config=change_point_config,
)
# single test config can have multiple metrics so we need to
# iterate over all the metrics and run change point analysis
# for each metric.
metric_names = params['metric_name']
if isinstance(metric_names, str):
metric_names = [metric_names]

for metric_name in metric_names:
test_config_container = get_test_config_container(
params=params, test_id=test_id, metric_name=metric_name)
change_point_config = get_change_point_config(params)
run_change_point_analysis(
test_config_container=test_config_container,
big_query_metrics_fetcher=big_query_metrics_fetcher,
change_point_config=change_point_config,
save_alert_metadata=save_alert_metadata)


if __name__ == '__main__':
Expand All @@ -241,7 +260,7 @@ def run(
parser = argparse.ArgumentParser()
parser.add_argument(
'--config_file_path',
default=None,
required=True,
type=str,
help='Path to the config file that contains data to run the Change Point '
'Analysis.The default file will used will be '
Expand All @@ -250,9 +269,17 @@ def run(
'performance regression in the tests, '
'please provide an .yml file in the same structure as the above '
'mentioned file. ')
parser.add_argument(
'--save_alert_metadata',
action='store_true',
help='Save perf alert/ GH Issue metadata to BigQuery table.')
known_args, unknown_args = parser.parse_known_args()

if unknown_args:
logging.warning('Discarding unknown arguments : %s ' % unknown_args)

run(config_file_path=known_args.config_file_path)
run(
config_file_path=known_args.config_file_path,
# Set this to true while running in production.
save_alert_metadata=known_args.save_alert_metadata # pylint: disable=line-too-long
)
12 changes: 9 additions & 3 deletions sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ def test_duplicate_change_points_are_not_valid_alerts(self):
get_fake_data_with_no_change_point)
def test_no_alerts_when_no_change_points(self):
test_config_container = analysis.get_test_config_container(
params=self.params, test_id=self.test_id)
params=self.params,
test_id=self.test_id,
metric_name=self.params['metric_name'])
is_alert = analysis.run_change_point_analysis(
test_config_container=test_config_container,
big_query_metrics_fetcher=BigQueryMetricsFetcher())
Expand All @@ -183,7 +185,9 @@ def test_no_alerts_when_no_change_points(self):
return_value=(0, ''))
def test_alert_on_data_with_change_point(self, *args):
test_config_container = analysis.get_test_config_container(
params=self.params, test_id=self.test_id)
params=self.params,
test_id=self.test_id,
metric_name=self.params['metric_name'])
is_alert = analysis.run_change_point_analysis(
test_config_container=test_config_container,
big_query_metrics_fetcher=BigQueryMetricsFetcher())
Expand All @@ -205,7 +209,9 @@ def test_alert_on_data_with_change_point(self, *args):
return_value=(0, ''))
def test_alert_on_data_with_reported_change_point(self, *args):
test_config_container = analysis.get_test_config_container(
params=self.params, test_id=self.test_id)
params=self.params,
test_id=self.test_id,
metric_name=self.params['metric_name'])
is_alert = analysis.run_change_point_analysis(
test_config_container=test_config_container,
big_query_metrics_fetcher=BigQueryMetricsFetcher())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class ChangePointConfig:

@dataclass
class TestConfigContainer:
metric_name: str # make this list instead.
metric_name: str
project: str
metrics_dataset: str
metrics_table: str
Expand Down

0 comments on commit 7b12e09

Please sign in to comment.