From 1260d9c042f7a6351a4b67611ce39f91764dcbdf Mon Sep 17 00:00:00 2001 From: Shubham Raj <48172486+shubhamraj-git@users.noreply.github.com> Date: Wed, 23 Oct 2024 02:32:03 +0530 Subject: [PATCH] Better handling masking of values of set variable (#43123) --- airflow/utils/cli.py | 12 +++++++++-- tests/utils/test_cli_util.py | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/airflow/utils/cli.py b/airflow/utils/cli.py index e8b9e6f7d2226..1142c5ba0b62b 100644 --- a/airflow/utils/cli.py +++ b/airflow/utils/cli.py @@ -39,6 +39,7 @@ from airflow.exceptions import AirflowException from airflow.utils import cli_action_loggers, timezone from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler +from airflow.utils.log.secrets_masker import should_hide_value_for_key from airflow.utils.platform import getuser, is_terminal_support_colors from airflow.utils.session import NEW_SESSION, provide_session @@ -139,11 +140,18 @@ def _build_metrics(func_name, namespace): :param namespace: Namespace instance from argparse :return: dict with metrics """ - sub_commands_to_check = {"users", "connections"} + sub_commands_to_check_for_sensitive_fields = {"users", "connections"} + sub_commands_to_check_for_sensitive_key = {"variables"} sensitive_fields = {"-p", "--password", "--conn-password"} full_command = list(sys.argv) sub_command = full_command[1] if len(full_command) > 1 else None - if sub_command in sub_commands_to_check: + # For cases when value under sub_commands_to_check_for_sensitive_key have sensitive info + if sub_command in sub_commands_to_check_for_sensitive_key: + key = full_command[-2] if len(full_command) > 3 else None + if key and should_hide_value_for_key(key): + # Mask the sensitive value since key contain sensitive keyword + full_command[-1] = "*" * 8 + elif sub_command in sub_commands_to_check_for_sensitive_fields: for idx, command in enumerate(full_command): if command in sensitive_fields: # For cases when password is passed as "--password xyz" (with space between key and value) diff --git a/tests/utils/test_cli_util.py b/tests/utils/test_cli_util.py index 25003eec6d0eb..ba018cdad36dd 100644 --- a/tests/utils/test_cli_util.py +++ b/tests/utils/test_cli_util.py @@ -185,6 +185,47 @@ def test_get_dag_by_pickle(self, session, dag_maker): with pytest.raises(AirflowException, match="pickle_id could not be found .* -42"): get_dag_by_pickle(pickle_id=-42, session=session) + @pytest.mark.parametrize( + ["given_command", "expected_masked_command"], + [ + ( + "airflow variables set --description 'needed for dag 4' client_secret_234 7fh4375f5gy353wdf", + "airflow variables set --description 'needed for dag 4' client_secret_234 ********", + ), + ( + "airflow variables set cust_secret_234 7fh4375f5gy353wdf", + "airflow variables set cust_secret_234 ********", + ), + ], + ) + def test_cli_set_variable_supplied_sensitive_value_is_masked( + self, given_command, expected_masked_command, session + ): + args = given_command.split() + + expected_command = expected_masked_command.split() + + exec_date = timezone.utcnow() + namespace = Namespace(dag_id="foo", task_id="bar", subcommand="test", execution_date=exec_date) + with mock.patch.object(sys, "argv", args), mock.patch( + "airflow.utils.session.create_session" + ) as mock_create_session: + metrics = cli._build_metrics(args[1], namespace) + # Make it so the default_action_log doesn't actually commit the txn, by giving it a next txn + # instead + mock_create_session.return_value = session.begin_nested() + mock_create_session.return_value.bulk_insert_mappings = session.bulk_insert_mappings + cli_action_loggers.default_action_log(**metrics) + + log = session.query(Log).order_by(Log.dttm.desc()).first() + + assert metrics.get("start_datetime") <= timezone.utcnow() + + command: str = json.loads(log.extra).get("full_command") + # Replace single quotes to double quotes to avoid json decode error + command = ast.literal_eval(command) + assert command == expected_command + @contextmanager def fail_action_logger_callback():