From 7f69e78114142329822bf6ecd3a25ba017a7148a Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Thu, 16 Jun 2022 17:43:52 -0700 Subject: [PATCH] fix(embedded): Retry when executing alert queries to avoid sending transient errors to users as alert failure notifications --- superset/config.py | 3 + superset/reports/commands/alert.py | 9 +- .../integration_tests/reports/alert_tests.py | 119 ++++++++++++++++++ .../integration_tests/superset_test_config.py | 2 + 4 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 tests/integration_tests/reports/alert_tests.py diff --git a/superset/config.py b/superset/config.py index 8a5ec248fb8a3..3996d972f3692 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1072,6 +1072,9 @@ def SQL_QUERY_MUTATOR( # pylint: disable=invalid-name,unused-argument # If set to true no notification is sent, the worker will just log a message. # Useful for debugging ALERT_REPORTS_NOTIFICATION_DRY_RUN = False +# Max tries to run queries to prevent false errors caused by transient errors +# being returned to users. Set to a value >1 to enable retries. +ALERT_REPORTS_QUERY_EXECUTION_MAX_TRIES = 1 # A custom prefix to use on all Alerts & Reports emails EMAIL_REPORTS_SUBJECT_PREFIX = "[Report] " diff --git a/superset/reports/commands/alert.py b/superset/reports/commands/alert.py index 080fa3a3a763a..6382826aad54d 100644 --- a/superset/reports/commands/alert.py +++ b/superset/reports/commands/alert.py @@ -37,6 +37,7 @@ AlertValidatorConfigError, ) from superset.utils.core import override_user +from superset.utils.retries import retry_call logger = logging.getLogger(__name__) @@ -171,7 +172,13 @@ def validate(self) -> None: """ Validate the query result as a Pandas DataFrame """ - df = self._execute_query() + # When there are transient errors when executing queries, users will get + # notified with the error stacktrace which can be avoided by retrying + df = retry_call( + self._execute_query, + exception=AlertQueryError, + max_tries=app.config["ALERT_REPORTS_QUERY_EXECUTION_MAX_TRIES"], + ) if df.empty and self._is_validator_not_null: self._result = None diff --git a/tests/integration_tests/reports/alert_tests.py b/tests/integration_tests/reports/alert_tests.py new file mode 100644 index 0000000000000..d868aaea232d1 --- /dev/null +++ b/tests/integration_tests/reports/alert_tests.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument, import-outside-toplevel + +import pandas as pd +from pytest_mock import MockFixture + + +def test_execute_query_succeeded_no_retry( + mocker: MockFixture, app_context: None +) -> None: + + from superset.reports.commands.alert import AlertCommand + + execute_query_mock = mocker.patch( + "superset.reports.commands.alert.AlertCommand._execute_query", + side_effect=lambda: pd.DataFrame([{"sample_col": 0}]), + ) + + command = AlertCommand(report_schedule=mocker.Mock()) + + command.validate() + + assert execute_query_mock.call_count == 1 + + +def test_execute_query_succeeded_with_retries( + mocker: MockFixture, app_context: None +) -> None: + from superset.reports.commands.alert import AlertCommand, AlertQueryError + + execute_query_mock = mocker.patch( + "superset.reports.commands.alert.AlertCommand._execute_query" + ) + + query_executed_count = 0 + # Should match the value defined in superset_test_config.py + expected_max_retries = 3 + + def _mocked_execute_query() -> pd.DataFrame: + nonlocal query_executed_count + query_executed_count += 1 + + if query_executed_count < expected_max_retries: + raise AlertQueryError() + else: + return pd.DataFrame([{"sample_col": 0}]) + + execute_query_mock.side_effect = _mocked_execute_query + execute_query_mock.__name__ = "mocked_execute_query" + + command = AlertCommand(report_schedule=mocker.Mock()) + + command.validate() + + assert execute_query_mock.call_count == expected_max_retries + + +def test_execute_query_failed_no_retry(mocker: MockFixture, app_context: None) -> None: + from superset.reports.commands.alert import AlertCommand, AlertQueryTimeout + + execute_query_mock = mocker.patch( + "superset.reports.commands.alert.AlertCommand._execute_query" + ) + + def _mocked_execute_query() -> None: + raise AlertQueryTimeout + + execute_query_mock.side_effect = _mocked_execute_query + execute_query_mock.__name__ = "mocked_execute_query" + + command = AlertCommand(report_schedule=mocker.Mock()) + + try: + command.validate() + except AlertQueryTimeout: + pass + + assert execute_query_mock.call_count == 1 + + +def test_execute_query_failed_max_retries( + mocker: MockFixture, app_context: None +) -> None: + from superset.reports.commands.alert import AlertCommand, AlertQueryError + + execute_query_mock = mocker.patch( + "superset.reports.commands.alert.AlertCommand._execute_query" + ) + + def _mocked_execute_query() -> None: + raise AlertQueryError + + execute_query_mock.side_effect = _mocked_execute_query + execute_query_mock.__name__ = "mocked_execute_query" + + command = AlertCommand(report_schedule=mocker.Mock()) + + try: + command.validate() + except AlertQueryError: + pass + + # Should match the value defined in superset_test_config.py + assert execute_query_mock.call_count == 3 diff --git a/tests/integration_tests/superset_test_config.py b/tests/integration_tests/superset_test_config.py index 983476490fe13..2907f4ceb8b95 100644 --- a/tests/integration_tests/superset_test_config.py +++ b/tests/integration_tests/superset_test_config.py @@ -115,6 +115,8 @@ def GET_FEATURE_FLAGS_FUNC(ff): ALERT_REPORTS_WORKING_TIME_OUT_KILL = True +ALERT_REPORTS_QUERY_EXECUTION_MAX_TRIES = 3 + class CeleryConfig(object): BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}"