apache · potiuk · Oct 23, 2024 · Oct 22, 2024
diff --git a/airflow/utils/cli.py b/airflow/utils/cli.py
@@ -39,6 +39,7 @@
 from airflow.exceptions import AirflowException, RemovedInAirflow3Warning
 from airflow.utils import cli_action_loggers, timezone
 from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler
+from airflow.utils.log.secrets_masker import should_hide_value_for_key
 from airflow.utils.platform import getuser, is_terminal_support_colors
 from airflow.utils.session import NEW_SESSION, provide_session
 
@@ -139,11 +140,18 @@ def _build_metrics(func_name, namespace):
     :param namespace: Namespace instance from argparse
     :return: dict with metrics
     """
-    sub_commands_to_check = {"users", "connections"}
+    sub_commands_to_check_for_sensitive_fields = {"users", "connections"}
+    sub_commands_to_check_for_sensitive_key = {"variables"}
     sensitive_fields = {"-p", "--password", "--conn-password"}
     full_command = list(sys.argv)
     sub_command = full_command[1] if len(full_command) > 1 else None
-    if sub_command in sub_commands_to_check:
+    # For cases when value under sub_commands_to_check_for_sensitive_key have sensitive info
+    if sub_command in sub_commands_to_check_for_sensitive_key:
+        key = full_command[-2] if len(full_command) > 3 else None
+        if key and should_hide_value_for_key(key):
+            # Mask the sensitive value since key contain sensitive keyword
+            full_command[-1] = "*" * 8
+    elif sub_command in sub_commands_to_check_for_sensitive_fields:
         for idx, command in enumerate(full_command):
             if command in sensitive_fields:
                 # For cases when password is passed as "--password xyz" (with space between key and value)

diff --git a/tests/utils/test_cli_util.py b/tests/utils/test_cli_util.py
@@ -188,6 +188,47 @@ def test_get_dag_by_pickle(self, session, dag_maker):
         with pytest.raises(AirflowException, match="pickle_id could not be found .* -42"):
             get_dag_by_pickle(pickle_id=-42, session=session)
 
+    @pytest.mark.parametrize(
+        ["given_command", "expected_masked_command"],
+        [
+            (
+                "airflow variables set --description 'needed for dag 4' client_secret_234 7fh4375f5gy353wdf",
+                "airflow variables set --description 'needed for dag 4' client_secret_234 ********",
+            ),
+            (
+                "airflow variables set cust_secret_234 7fh4375f5gy353wdf",
+                "airflow variables set cust_secret_234 ********",
+            ),
+        ],
+    )
+    def test_cli_set_variable_supplied_sensitive_value_is_masked(
+        self, given_command, expected_masked_command, session
+    ):
+        args = given_command.split()
+
+        expected_command = expected_masked_command.split()
+
+        exec_date = timezone.utcnow()
+        namespace = Namespace(dag_id="foo", task_id="bar", subcommand="test", execution_date=exec_date)
+        with mock.patch.object(sys, "argv", args), mock.patch(
+            "airflow.utils.session.create_session"
+        ) as mock_create_session:
+            metrics = cli._build_metrics(args[1], namespace)
+            # Make it so the default_action_log doesn't actually commit the txn, by giving it a next txn
+            # instead
+            mock_create_session.return_value = session.begin_nested()
+            mock_create_session.return_value.bulk_insert_mappings = session.bulk_insert_mappings
+            cli_action_loggers.default_action_log(**metrics)
+
+            log = session.query(Log).order_by(Log.dttm.desc()).first()
+
+        assert metrics.get("start_datetime") <= timezone.utcnow()
+
+        command: str = json.loads(log.extra).get("full_command")
+        # Replace single quotes to double quotes to avoid json decode error
+        command = ast.literal_eval(command)
+        assert command == expected_command
+
 
 @contextmanager
 def fail_action_logger_callback():