diff --git a/airflow/utils/cli.py b/airflow/utils/cli.py index 00e6e15ba312c..74f0dc79f7b77 100644 --- a/airflow/utils/cli.py +++ b/airflow/utils/cli.py @@ -39,6 +39,7 @@ from airflow.exceptions import AirflowException, RemovedInAirflow3Warning from airflow.utils import cli_action_loggers, timezone from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler +from airflow.utils.log.secrets_masker import should_hide_value_for_key from airflow.utils.platform import getuser, is_terminal_support_colors from airflow.utils.session import NEW_SESSION, provide_session @@ -139,11 +140,18 @@ def _build_metrics(func_name, namespace): :param namespace: Namespace instance from argparse :return: dict with metrics """ - sub_commands_to_check = {"users", "connections"} + sub_commands_to_check_for_sensitive_fields = {"users", "connections"} + sub_commands_to_check_for_sensitive_key = {"variables"} sensitive_fields = {"-p", "--password", "--conn-password"} full_command = list(sys.argv) sub_command = full_command[1] if len(full_command) > 1 else None - if sub_command in sub_commands_to_check: + # For cases when value under sub_commands_to_check_for_sensitive_key have sensitive info + if sub_command in sub_commands_to_check_for_sensitive_key: + key = full_command[-2] if len(full_command) > 3 else None + if key and should_hide_value_for_key(key): + # Mask the sensitive value since key contain sensitive keyword + full_command[-1] = "*" * 8 + elif sub_command in sub_commands_to_check_for_sensitive_fields: for idx, command in enumerate(full_command): if command in sensitive_fields: # For cases when password is passed as "--password xyz" (with space between key and value) diff --git a/tests/utils/test_cli_util.py b/tests/utils/test_cli_util.py index 395db77e0392f..bf5fd9bacb32f 100644 --- a/tests/utils/test_cli_util.py +++ b/tests/utils/test_cli_util.py @@ -188,6 +188,47 @@ def test_get_dag_by_pickle(self, session, dag_maker): with pytest.raises(AirflowException, match="pickle_id could not be found .* -42"): get_dag_by_pickle(pickle_id=-42, session=session) + @pytest.mark.parametrize( + ["given_command", "expected_masked_command"], + [ + ( + "airflow variables set --description 'needed for dag 4' client_secret_234 7fh4375f5gy353wdf", + "airflow variables set --description 'needed for dag 4' client_secret_234 ********", + ), + ( + "airflow variables set cust_secret_234 7fh4375f5gy353wdf", + "airflow variables set cust_secret_234 ********", + ), + ], + ) + def test_cli_set_variable_supplied_sensitive_value_is_masked( + self, given_command, expected_masked_command, session + ): + args = given_command.split() + + expected_command = expected_masked_command.split() + + exec_date = timezone.utcnow() + namespace = Namespace(dag_id="foo", task_id="bar", subcommand="test", execution_date=exec_date) + with mock.patch.object(sys, "argv", args), mock.patch( + "airflow.utils.session.create_session" + ) as mock_create_session: + metrics = cli._build_metrics(args[1], namespace) + # Make it so the default_action_log doesn't actually commit the txn, by giving it a next txn + # instead + mock_create_session.return_value = session.begin_nested() + mock_create_session.return_value.bulk_insert_mappings = session.bulk_insert_mappings + cli_action_loggers.default_action_log(**metrics) + + log = session.query(Log).order_by(Log.dttm.desc()).first() + + assert metrics.get("start_datetime") <= timezone.utcnow() + + command: str = json.loads(log.extra).get("full_command") + # Replace single quotes to double quotes to avoid json decode error + command = ast.literal_eval(command) + assert command == expected_command + @contextmanager def fail_action_logger_callback():