From 416fa1a9272408922ce39f8761c513b3a32a2c99 Mon Sep 17 00:00:00 2001 From: Balthazar Rouberol Date: Mon, 4 Nov 2024 20:36:00 +0100 Subject: [PATCH] fab_auth_manager: allow get_user method to return the user authenticated via Kerberos The issue this PR fixes was initially discussed in https://github.com/apache/airflow/discussions/39683. @jijoj-hmetrix and I noticed that, starting from Airflow 2.8.0, Kerberos authentication does not seem to work with the stable API. Even when a user provides a valid Kerberos ticket, and that the whole gssapi authentication dance is successful, and that the user has the required permissions, the API returns a 403 response. ```console $ curl --negotiate -u: -s --service-name airflow https://airflow-test.xxxx.com/api/v1/pools | jq . { "detail": null, "status": 403, "title": "Forbidden", "type": "https://airflow.apache.org/docs/apache-airflow/2.10.2/stable-rest-api-ref.html#section/Errors/PermissionDenied" } ``` I found that [`airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager.get_user`](https://github.com/apache/airflow/blob/baf2b3cb4453d44ff00598a3b0c42d432a7203f9/providers/src/airflow/providers/fab/auth_manager/fab_auth_manager.py#L185-L189) relies on flask-login's [current_user](https://github.com/maxcountryman/flask-login/blob/main/src/flask_login/utils.py#L25) to get the currently logged in user from the session. However, the Kerberos auth backend stores the authenticated user [in:wq`g`](https://github.com/brouberol/airflow/blob/main/providers/src/airflow/providers/fab/auth_manager/api/auth/backend/kerberos_auth.py#L136) and not in the session. This patch allows the current user to be pulled either from `g` or the session, which allows the API to detect the user authenticated via Kerberos, and then link it to Fab permissions. Here's an examle from an instance running with the patch, with a admin user associated with a User account with Admin permissions: ```console $ curl --negotiate -u: -s --service-name airflow https://airflow-test.xxx.com/api/v1/pools { "pools": [ { "deferred_slots": 0, "description": "Default pool", "include_deferred": false, "name": "default_pool", "occupied_slots": 0, "open_slots": 128, "queued_slots": 0, "running_slots": 0, "scheduled_slots": 0, "slots": 128 } ], "total_entries": 1 } ``` I accompany the change with a small unit test. Signed-off-by: Balthazar Rouberol --- .../fab/auth_manager/fab_auth_manager.py | 15 +++++++++-- .../fab/auth_manager/test_fab_auth_manager.py | 26 ++++++++++++++++--- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/providers/src/airflow/providers/fab/auth_manager/fab_auth_manager.py b/providers/src/airflow/providers/fab/auth_manager/fab_auth_manager.py index 8a8fad6788693c..da31579d22c2ef 100644 --- a/providers/src/airflow/providers/fab/auth_manager/fab_auth_manager.py +++ b/providers/src/airflow/providers/fab/auth_manager/fab_auth_manager.py @@ -25,7 +25,7 @@ import packaging.version from connexion import FlaskApi -from flask import Blueprint, url_for +from flask import Blueprint, url_for, g from packaging.version import Version from sqlalchemy import select from sqlalchemy.orm import Session, joinedload @@ -183,9 +183,20 @@ def get_user_display_name(self) -> str: return f"{first_name} {last_name}".strip() def get_user(self) -> User: - """Return the user associated to the user in session.""" + """ + Return the user associated to the user in session. + + Attempt to find the current user in g.user, as defined by the kerberos authentication backend. + If no such user is found, return the `current_user` local proxy object, linked to the user session. + + """ from flask_login import current_user + # If a user has gone through the Kerberos dance, the kerberos authentication manager + # has linked it with a User model, stored in g.user, and not the session. + if current_user.is_anonymous and getattr(g, "user", None) is not None and not g.user.is_anonymous: + return g.user + return current_user def init(self) -> None: diff --git a/providers/tests/fab/auth_manager/test_fab_auth_manager.py b/providers/tests/fab/auth_manager/test_fab_auth_manager.py index 91efb8428c654f..6f12329bf5ea38 100644 --- a/providers/tests/fab/auth_manager/test_fab_auth_manager.py +++ b/providers/tests/fab/auth_manager/test_fab_auth_manager.py @@ -22,8 +22,9 @@ from unittest.mock import Mock import pytest -from flask import Flask +from flask import Flask, g +from contextlib import contextmanager from airflow.exceptions import AirflowConfigException, AirflowException try: @@ -72,6 +73,13 @@ } +@contextmanager +def user_set(app, user): + g.user = user + yield + g.user = None + + @pytest.fixture def auth_manager(): return FabAuthManager(None) @@ -114,12 +122,24 @@ def test_get_user_display_name( assert auth_manager.get_user_display_name() == expected @mock.patch("flask_login.utils._get_user") - def test_get_user(self, mock_current_user, auth_manager): + def test_get_user(self, mock_current_user, minimal_app_for_auth_api, auth_manager): user = Mock() user.is_anonymous.return_value = True mock_current_user.return_value = user + with minimal_app_for_auth_api.app_context(): + assert auth_manager.get_user() == user - assert auth_manager.get_user() == user + @mock.patch("flask_login.utils._get_user") + def test_get_user_from_flask_g(self, mock_current_user, minimal_app_for_auth_api, auth_manager): + session_user = Mock() + session_user.is_anonymous = True + mock_current_user.return_value = session_user + + flask_g_user = Mock() + flask_g_user.is_anonymous = False + with minimal_app_for_auth_api.app_context(): + with user_set(minimal_app_for_auth_api, flask_g_user): + assert auth_manager.get_user() == flask_g_user @pytest.mark.db_test @mock.patch.object(FabAuthManager, "get_user")