diff --git a/changelog.d/20241115_095433_kevin_implement_pam_for_meps.rst b/changelog.d/20241115_095433_kevin_implement_pam_for_meps.rst new file mode 100644 index 000000000..669e4651f --- /dev/null +++ b/changelog.d/20241115_095433_kevin_implement_pam_for_meps.rst @@ -0,0 +1,27 @@ +New Functionality +^^^^^^^^^^^^^^^^^ + +- Implement optional PAM capabilities for ensuring user accounts meet + site-specific criteria before starting user endpoints. Within the multi user + endpoint, PAM defaults to off, but is enabled via the ``pam`` field: + + .. code-block:: yaml + :caption: ``config.yaml`` -- Example MEP configuration opting-in to PAM + + multi_user: true + pam: + enable: true + + As authentication is implemented via Globus Auth and identity mapping, the + Globus Compute Endpoint does not implement the authorization or password + managment phases of PAM. It implements account + (|pam_acct_mgmt(3)|_) and session (|pam_open_session(3)|) management. + + For more information, consult :ref:`the PAM section ` of the + documentation. + + .. |pam_acct_mgmt(3)| replace:: ``pam_acct_mgmt(3)`` + .. _pam_acct_mgmt(3): https://www.man7.org/linux/man-pages/man3/pam_acct_mgmt.3.html + .. |pam_open_session(3)| replace:: ``pam_open_session(3)`` + .. _pam_open_session(3): https://www.man7.org/linux/man-pages/man3/pam_open_session.3.html + diff --git a/compute_endpoint/globus_compute_endpoint/boot_persistence.py b/compute_endpoint/globus_compute_endpoint/boot_persistence.py index b77f828ac..9bbdda0f3 100644 --- a/compute_endpoint/globus_compute_endpoint/boot_persistence.py +++ b/compute_endpoint/globus_compute_endpoint/boot_persistence.py @@ -4,7 +4,7 @@ import textwrap from click import ClickException -from globus_compute_endpoint.endpoint.config import UserEndpointConfig +from globus_compute_endpoint.endpoint.config.config import UserEndpointConfig from globus_compute_endpoint.endpoint.config.utils import get_config from globus_compute_endpoint.endpoint.endpoint import Endpoint from globus_sdk import GlobusApp diff --git a/compute_endpoint/globus_compute_endpoint/cli.py b/compute_endpoint/globus_compute_endpoint/cli.py index b2fe6ecfb..9f03ec499 100644 --- a/compute_endpoint/globus_compute_endpoint/cli.py +++ b/compute_endpoint/globus_compute_endpoint/cli.py @@ -626,8 +626,10 @@ def _do_start_endpoint( no_color=state.no_color, ) + _no_fn_list_canary = -15 # an arbitrary random integer; invalid as an allow_list reg_info = {} - config_str = None + config_str: str | None = None + fn_allow_list: list[str] | None | int = _no_fn_list_canary if sys.stdin and not (sys.stdin.closed or sys.stdin.isatty()): try: stdin_data = json.loads(sys.stdin.read()) @@ -641,6 +643,9 @@ def _do_start_endpoint( reg_info = stdin_data.get("amqp_creds", {}) config_str = stdin_data.get("config", None) + fn_allow_list = stdin_data.get("allowed_functions", _no_fn_list_canary) + + del stdin_data # clarity for intended scope except Exception as e: exc_type = e.__class__.__name__ @@ -654,6 +659,9 @@ def _do_start_endpoint( ep_config = get_config(ep_dir) del config_str + if fn_allow_list != _no_fn_list_canary: + ep_config.allowed_functions = fn_allow_list + if not state.debug and ep_config.debug: setup_logging( logfile=ep_dir / "endpoint.log", diff --git a/compute_endpoint/globus_compute_endpoint/endpoint/config/__init__.py b/compute_endpoint/globus_compute_endpoint/endpoint/config/__init__.py index 4656f77b5..e3d2eda84 100644 --- a/compute_endpoint/globus_compute_endpoint/endpoint/config/__init__.py +++ b/compute_endpoint/globus_compute_endpoint/endpoint/config/__init__.py @@ -10,3 +10,4 @@ ManagerEndpointConfigModel, UserEndpointConfigModel, ) +from .pam import PamConfiguration # noqa: F401 diff --git a/compute_endpoint/globus_compute_endpoint/endpoint/config/config.py b/compute_endpoint/globus_compute_endpoint/endpoint/config/config.py index 9e19a88b3..f0d295b24 100644 --- a/compute_endpoint/globus_compute_endpoint/endpoint/config/config.py +++ b/compute_endpoint/globus_compute_endpoint/endpoint/config/config.py @@ -15,6 +15,7 @@ ) from ..utils import is_privileged +from .pam import PamConfiguration MINIMUM_HEARTBEAT: float = 5.0 log = logging.getLogger(__name__) @@ -116,7 +117,7 @@ def heartbeat_period(self, val: float | int): @property def allowed_functions(self): - if self._allowed_functions: + if self._allowed_functions is not None: return tuple(map(str, self._allowed_functions)) return None @@ -327,6 +328,10 @@ class ManagerEndpointConfig(BaseConfig): configuration item is required, and a ``ValueError`` will be raised if the path does not exist. + :param pam: Whether to enable authorization of user-endpoints via PAM routines, and + optionally specify the PAM service name. See |PamConfiguration|. If not + specified, PAM authorization defaults to disabled. + :param mu_child_ep_grace_period_s: The web-services send a start-user-endpoint to the endpoint manager ahead of tasks for the target user endpoint. If the user-endpoint is already running, these requests are ignored. To account for @@ -347,6 +352,7 @@ class ManagerEndpointConfig(BaseConfig): .. |BaseConfig| replace:: :class:`BaseConfig ` .. |ManagerEndpointConfig| replace:: :class:`ManagerEndpointConfig ` .. |UserEndpointConfig| replace:: :class:`UserEndpointConfig ` + .. |PamConfiguration| replace:: :class:`PamConfiguration ` .. |setuid(2)| replace:: ``setuid(2)`` .. _setuid(2): https://www.man7.org/linux/man-pages/man2/setuid.2.html @@ -357,6 +363,7 @@ def __init__( *, public: bool = False, identity_mapping_config_path: os.PathLike | str | None = None, + pam: PamConfiguration | None = None, force_mu_allow_same_user: bool = False, mu_child_ep_grace_period_s: float = 30.0, **kwargs, @@ -372,6 +379,8 @@ def __init__( _tmp = identity_mapping_config_path # work with both mypy and flake8 self.identity_mapping_config_path = _tmp # type: ignore[assignment] + self.pam = pam or PamConfiguration(enable=False) + @property def identity_mapping_config_path(self) -> pathlib.Path | None: return self._identity_mapping_config_path diff --git a/compute_endpoint/globus_compute_endpoint/endpoint/config/model.py b/compute_endpoint/globus_compute_endpoint/endpoint/config/model.py index ef7ecfe20..3ae57f8b3 100644 --- a/compute_endpoint/globus_compute_endpoint/endpoint/config/model.py +++ b/compute_endpoint/globus_compute_endpoint/endpoint/config/model.py @@ -11,6 +11,7 @@ validator, ) from globus_compute_endpoint import engines, strategies +from globus_compute_endpoint.endpoint.config.pam import PamConfiguration from parsl import addresses as parsl_addresses from parsl import channels as parsl_channels from parsl import launchers as parsl_launchers @@ -187,3 +188,4 @@ class ManagerEndpointConfigModel(BaseEndpointConfigModel): identity_mapping_config_path: t.Optional[FilePath] force_mu_allow_same_user: t.Optional[bool] mu_child_ep_grace_period_s: t.Optional[float] + pam: t.Optional[PamConfiguration] diff --git a/compute_endpoint/globus_compute_endpoint/endpoint/config/pam.py b/compute_endpoint/globus_compute_endpoint/endpoint/config/pam.py new file mode 100644 index 000000000..894961112 --- /dev/null +++ b/compute_endpoint/globus_compute_endpoint/endpoint/config/pam.py @@ -0,0 +1,26 @@ +from dataclasses import asdict, dataclass + +import yaml + + +@dataclass +class PamConfiguration: + """ + :param enable: Whether to initiate a PAM session for each UEP start request. + + :param service_name: What PAM service name with which to initialize the PAM + session. If a particular MEP has different requirements, define those PAM + requirements in ``/etc/pam.d/``, and specify the service name with this field. + + See :ref:`MEP § PAM ` for more information + """ + + enable: bool = True + service_name: str = "globus-compute-endpoint" + + +def _to_yaml(dumper: yaml.SafeDumper, data: PamConfiguration): + return dumper.represent_mapping("tag:yaml.org,2002:map", asdict(data)) + + +yaml.SafeDumper.add_representer(PamConfiguration, _to_yaml) diff --git a/compute_endpoint/globus_compute_endpoint/endpoint/endpoint_manager.py b/compute_endpoint/globus_compute_endpoint/endpoint/endpoint_manager.py index d87720c2c..1e4d52c43 100644 --- a/compute_endpoint/globus_compute_endpoint/endpoint/endpoint_manager.py +++ b/compute_endpoint/globus_compute_endpoint/endpoint/endpoint_manager.py @@ -14,22 +14,18 @@ import sys import threading import time +import types import typing as t import uuid from concurrent.futures import Future +from contextlib import contextmanager from datetime import datetime from http import HTTPStatus import globus_compute_sdk as GC -from cachetools import TTLCache -from globus_compute_endpoint.endpoint.identity_mapper import PosixIdentityMapper - -try: - import pyprctl -except AttributeError as e: - raise ImportError("pyprctl is not supported on this system") from e import setproctitle import yaml +from cachetools import TTLCache from globus_compute_common.messagepack import pack from globus_compute_common.messagepack.message_types import EPStatusReport from globus_compute_common.pydantic_v1 import BaseModel @@ -42,6 +38,7 @@ serialize_config, ) from globus_compute_endpoint.endpoint.endpoint import Endpoint +from globus_compute_endpoint.endpoint.identity_mapper import PosixIdentityMapper from globus_compute_endpoint.endpoint.rabbit_mq import ( CommandQueueSubscriber, ResultPublisher, @@ -69,6 +66,23 @@ class InvalidUserError(Exception): pass +def _import_pyprctl(): + # Enable conditional import, and create a hook-point for testing to mock + try: + import pyprctl + except AttributeError as e: + raise ImportError("pyprctl is not supported on this system") from e + + return pyprctl + + +def _import_pam() -> types.ModuleType: + # Enable conditional import, and create a hook-point for testing to mock + from globus_compute_endpoint import pam + + return pam + + class UserEndpointRecord(BaseModel): ep_name: str local_user_info: t.Optional[pwd.struct_passwd] @@ -104,6 +118,14 @@ def __init__( self.conf_dir = conf_dir self._config = config + + # UX - test conditional imports *now*, rather than when a request comes in; + # this gives immediate feedback to an implementing admin if something is awry + if config.pam.enable: + _import_pam() + else: + _import_pyprctl() + self._reload_requested = False self._time_to_stop = False self._kill_event = threading.Event() @@ -761,6 +783,83 @@ def send_failure_notice( finally: sys.exit() + @contextmanager + def do_host_auth(self, username: str): + def _affix_logd(prefix: str = "", suffix: str = ""): + def _wrap(msg, *a, **k): + log.debug(f"{prefix}{msg}{suffix}", *a, **k) + + return _wrap + + if not self._config.pam.enable: + try: + logd = _affix_logd(f"PRCTL ({username}): ") + logd("Importing module") + pyprctl = _import_pyprctl() + except Exception: + log.exception(f"({username}) Failed to import PRCTL library") + raise PermissionError("see your system administrator") from None + + yield + + try: + # If the administrator has *not* enabled PAM, then assume the + # intention is for a paranoid safe process and drop all + # privileges now ... + logd("Dropping all process capabilities") + pyprctl.CapState().set_current() + + # ... and stating that even if exec'ing might return some + # privileges, "no." In particular after this, SETUID executables + # invoked from this process root will not get privileges + logd("Allowing no new process privileges (no setuid executables!)") + + pyprctl.set_no_new_privs() + except Exception: + log.exception(f"({username}) Failed to import PRCTL library") + raise PermissionError("see your system administrator") from None + + return + + sname = self._config.pam.service_name + try: + logd = _affix_logd(f"PAM ({sname}, {username}): ") + logd("Importing module") + pam = _import_pam() + + logd("Creating handle") + with pam.PamHandle(sname, username=username) as pamh: + logd("Invoking account stage") + pamh.pam_acct_mgmt() + logd("Creating credentials") + pamh.credentials_establish() + logd("Opening session") + pamh.pam_open_session() + + yield + + # wiped by initgroups, so reinitialize + logd("Recreating credentials") + pamh.credentials_establish() + logd("Closing session") + pamh.pam_close_session() + logd("Removing credentials") + pamh.credentials_delete() + + logd("Closing handle") + + except pam.PamError as e: + log.error(str(e)) # Share pamlib error with admin ... + + # ... but be opaque with user. + raise PermissionError("see your system administrator") from None + + except Exception: + log.exception(f"Unhandled error during PAM session for {username}") + + # Regardless, be opaque with user. + raise PermissionError("see your system administrator") from None + def cmd_start_endpoint( self, user_record: pwd.struct_passwd, @@ -898,25 +997,25 @@ def cmd_start_endpoint( # who run the multi-user setup as a non-privileged user, there is # no need to change the user: they're already executing _as that # uid_! - log.debug("Initializing groups for %s, %s", uname, gid) - os.initgroups(uname, gid) # raises (good!) on error - exit_code += 1 - # But actually becoming the correct UID is _not_ fungible. If we - # can't -- for whatever reason -- that's a problem. So do NOT - # ignore the potential error. - log.debug("Setting process group for %s to %s", pid, gid) - os.setresgid(gid, gid, gid) # raises (good!) on error - exit_code += 1 - log.debug("Setting process uid for %s to %s (%s)", pid, uid, uname) - os.setresuid(uid, uid, uid) # raises (good!) on error - exit_code += 1 + with self.do_host_auth(uname): + log.debug("Setting process group for %s to %s", pid, gid) + os.setresgid(gid, gid, gid) # raises (good!) on error + exit_code += 1 + + log.debug("Initializing groups for %s, %s", uname, gid) + os.initgroups(uname, gid) # raises (good!) on error + exit_code += 1 + + log.debug("Setting process uid for %s to %s (%s)", pid, uid, uname) + os.setresuid(uid, uid, uid) # raises (good!) on error + exit_code += 1 try: # Be paranoid by testing that we *can't* get back to orig_uid os.setuid(orig_uid) except PermissionError: - pass # good; the kernel has our backs now + pass # good; the kernel has our back now else: log.critical( "Unexpectedly regained original privileges! (Should not have" @@ -926,17 +1025,11 @@ def cmd_start_endpoint( # This message is potentially (likely) sent back to the SDK; no # sense in sharing the specifics (i.e., `msg`) beyond the # administrator. - raise PermissionError("PermissionError: failed to start endpoint") + raise PermissionError("failed to start endpoint") del orig_uid, orig_gid exit_code += 1 - # If we had any capabilities, we drop them now. - pyprctl.CapState().set_current() - - # Even if exec'ing might return some privileges, "no." - pyprctl.set_no_new_privs() - # some Q&D verification for admin debugging purposes if not shutil.which(proc_args[0], path=env["PATH"]): log.warning( @@ -980,10 +1073,11 @@ def cmd_start_endpoint( self._config, template_str, user_config_schema, user_opts, user_runtime ) stdin_data_dict = { + "allowed_functions": self._config.allowed_functions, "amqp_creds": kwargs.get("amqp_creds"), "config": user_config, } - stdin_data = json.dumps(stdin_data_dict) + stdin_data = json.dumps(stdin_data_dict, separators=(",", ":")) exit_code += 1 # Reminder: this is *os*.open, not *open*. Descriptors will not be closed diff --git a/compute_endpoint/globus_compute_endpoint/pam/__init__.py b/compute_endpoint/globus_compute_endpoint/pam/__init__.py new file mode 100644 index 000000000..2ca419371 --- /dev/null +++ b/compute_endpoint/globus_compute_endpoint/pam/__init__.py @@ -0,0 +1,338 @@ +from __future__ import annotations + +import typing as t +from ctypes import CDLL, CFUNCTYPE, POINTER, Structure, byref, c_char_p, c_int, c_void_p +from ctypes.util import find_library +from enum import Enum + +pam_lib_name = find_library("pam") +if pam_lib_name is None: + msg = ( + "Failed to find `pam` shared library\n" + "\n Hints:" + "\n - install libpam.so? (via yum, apt, etc.)" + "\n - ldconfig -p" + "\n - LD_PRELOAD" + ) + raise ImportError(msg) + +libpam = CDLL(pam_lib_name) + + +# Constants (interpreted here as Enums) as defined in libpam; see _pam_types.h +PAM_SUCCESS = 0 + + +class PamReturnEnum(Enum): + PAM_SUCCESS = PAM_SUCCESS + + PAM_OPEN_ERR = 1 # dlopen() failure when dynamically loading a service module + PAM_SYMBOL_ERR = 2 + PAM_SERVICE_ERR = 3 + PAM_SYSTEM_ERR = 4 + PAM_BUF_ERR = 5 + PAM_PERM_DENIED = 6 + PAM_AUTH_ERR = 7 + PAM_CRED_INSUFFICIENT = 8 + PAM_AUTHINFO_UNAVAIL = 9 + PAM_USER_UNKNOWN = 10 + PAM_MAXTRIES = 11 + PAM_NEW_AUTHTOK_REQD = 12 + PAM_ACCT_EXPIRED = 13 + PAM_SESSION_ERR = 14 + PAM_CRED_UNAVAIL = 15 + PAM_CRED_EXPIRED = 16 + PAM_CRED_ERR = 17 + PAM_NO_MODULE_DATA = 18 + PAM_CONV_ERR = 19 + PAM_AUTHTOK_ERR = 20 + PAM_AUTHTOK_RECOVERY_ERR = 21 + PAM_AUTHTOK_LOCK_BUSY = 22 + PAM_AUTHTOK_DISABLE_AGING = 23 + PAM_TRY_AGAIN = 24 + PAM_IGNORE = 25 + PAM_ABORT = 26 + PAM_AUTHTOK_EXPIRED = 27 + PAM_MODULE_UNKNOWN = 28 + PAM_BAD_ITEM = 29 + PAM_CONV_AGAIN = 30 + PAM_INCOMPLETE = 31 + + +class PamPrompt(Enum): + # message styles + PAM_PROMPT_ECHO_OFF = 1 + PAM_PROMPT_ECHO_ON = 2 + PAM_ERROR_MSG = 3 + PAM_TEXT_INFO = 4 + + +class PamAuthenticate(Enum): + PAM_DISALLOW_NULL_AUTHTOK = 0x1 + PAM_SILENT = 0x8000 + + +class PamCred(Enum): + PAM_ESTABLISH_CRED = 0x2 + PAM_DELETE_CRED = 0x4 + PAM_REINITIALIZE_CRED = 0x8 + PAM_REFRESH_CRED = 0x10 + PAM_SILENT = 0x8000 + + +class PamAcctMgmt(Enum): + PAM_DISALLOW_NULL_AUTHTOK = 0x1 + PAM_SILENT = 0x8000 + + +class PamSession(Enum): + PAM_SILENT = 0x8000 + + +class PamError(RuntimeError): + def __init__( + self, + pam_handle: PamHandle, + pam_error_code: int, + ): + msg = _pam_strerror(pam_handle, pam_error_code).decode() + try: + const_name = f" [{PamReturnEnum(pam_error_code).name}]" + except ValueError: + const_name = "" + super().__init__(f"(error code: {pam_error_code}{const_name}) {msg}") + + +class PamMessage(Structure): + """Wrapper for `pam_message` struct; see _pam_types.h""" + + _fields_ = (("msg_style", c_int), ("msg", c_char_p)) + + def __repr__(self) -> str: + c_name = type(self).__name__ + return f"{c_name}({self.msg_style}, {self.msg!r})" + + +class PamResponse(Structure): + """Wrapper for `pam_response` struct; see _pam_types.h""" + + _fields_ = (("resp", c_char_p), ("resp_retcode", c_int)) + + def __repr__(self) -> str: + c_name = type(self).__name__ + # emit the `resp_retcode` for completeness, but as of this implementation + # note that the C library does not use `resp_retcode` + return f"{c_name}({self.resp!r}, {self.resp_retcode})" + + +_conv_fn = CFUNCTYPE( + c_int, c_int, POINTER(POINTER(PamMessage)), POINTER(POINTER(PamResponse)), c_void_p +) + + +@_conv_fn +def _noop(*a): + return 0 + + +class PamConversation(Structure): + """Wrapper for `pam_conv` struct; see _pam_types.h""" + + _fields_ = (("conv", _conv_fn), ("appdata_ptr", c_void_p)) + + def __repr__(self) -> str: + # Use angle brackets as this output can't be used to create another instance + c_name = type(self).__name__ + return f"{c_name}<{self.conv}, {self.appdata_ptr}>" + + +PamConversationFunctionType = t.Callable[[int, PamMessage, PamResponse, c_void_p], int] + + +class PamHandle(Structure): + """Object-oriented wrapper of pam_handle_t""" + + _fields_ = (("handle", c_void_p),) + + def __init__( + self, + service_name: str = "", + username: str = "", + conversation_fn: PamConversationFunctionType | None = None, + *, + max_username_len: int = 512, + ): + super().__init__() + + self._started = False + self.service_name = service_name + self.username = username + self.conversation_fn = conversation_fn + self.last_rc: int = 0 + self.max_username_len = max_username_len + + @property + def service_name(self) -> str: + return self._service_name + + @service_name.setter + def service_name(self, name: str): + if name is None: + raise ValueError("service_name must be a string") + self._service_name = name + + @property + def max_username_len(self) -> int: + return self._max_username_len + + @max_username_len.setter + def max_username_len(self, val: int): + self._max_username_len = max(1, val) + + @property + def username(self) -> str: + # Protect buggy PAM implementations from themselves + return self._username[: self.max_username_len] + + @username.setter + def username(self, name: str): + if name is None: + raise ValueError("username must be a string") + self._username = name + + def __enter__(self): + sname = self.service_name + uname = self.username + if not sname: + raise ValueError("Missing service name; use `.service_name` or __init__ ") + if not uname: + raise ValueError("Missing user name; use `.username` or __init__") + self.pam_start(sname, uname, self.conversation_fn) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.pam_end() + + def pam_start( + self, + service_name: str, + username: str, + conversation_fn: ( + t.Callable[[int, PamMessage, PamResponse, c_void_p], int] | None + ), + ): + if self._started: + raise RuntimeError("Existing session not ended.") + conversation_fn = conversation_fn or _noop + pamc = PamConversation(_conv_fn(conversation_fn), 0) + sname_bytes = service_name.encode() + username_bytes = username.encode() + self.last_rc = _pam_start(sname_bytes, username_bytes, byref(pamc), byref(self)) + if self.last_rc != PAM_SUCCESS: + raise PamError(self, self.last_rc) + self._started = True + + def pam_end(self): + if not self._started: + return + self.last_rc = _pam_end(self, self.last_rc) + self._started = False + if self.last_rc != PAM_SUCCESS: + raise PamError(self, self.last_rc) + + def _flags_only_method(self, pam_fn: t.Callable, flags: Enum | int = 0): + """If flags sets more than one bit, pass an int, not an Enum""" + _fl: int = flags if isinstance(flags, int) else flags.value + self.last_rc = pam_fn(self, _fl) + if self.last_rc != PAM_SUCCESS: + raise PamError(self, self.last_rc) + + def pam_setcred(self, flags: PamCred | int = 0): + """If flags sets more than one bit, pass an int, not a PamCred""" + return self._flags_only_method(_pam_setcred, flags) + + def pam_acct_mgmt(self, flags: PamAcctMgmt | int = 0): + """If flags sets more than one bit, pass an int, not a PamAcctMgmt""" + return self._flags_only_method(_pam_acct_mgmt, flags) + + def pam_open_session(self, flags: PamSession | int = 0): + """If flags sets more than one bit, pass an int, not a PamSession""" + return self._flags_only_method(_pam_open_session, flags) + + def pam_close_session(self, flags: PamSession | int = 0): + """If flags sets more than one bit, pass an int, not a PamSession""" + return self._flags_only_method(_pam_close_session, flags) + + def pam_authenticate(self, flags: PamAuthenticate | int = 0): + """If flags sets more than one bit, pass an int, not a PamSession""" + return self._flags_only_method(_pam_authenticate, flags) + + def credentials_establish(self): + return self.pam_setcred(PamCred.PAM_ESTABLISH_CRED) + + def credentials_delete(self): + return self.pam_setcred(PamCred.PAM_DELETE_CRED) + + def credentials_refresh(self): + return self.pam_setcred(PamCred.PAM_REFRESH_CRED) + + def credentials_reinitialize(self): + return self.pam_setcred(PamCred.PAM_REINITIALIZE_CRED) + + +def _pam_fn_unavailable(fn_name: str, exc: Exception): + def wrapped(*_a, **_k): + raise AttributeError(f"{fn_name} not available in loaded PAM library") from exc + + return wrapped + + +# Not try-except wrapped: required PAM-accessible functions or "just go home." +_pam_start = libpam.pam_start +_pam_start.restype = c_int +_pam_start.argtypes = (c_char_p, c_char_p, POINTER(PamConversation), POINTER(PamHandle)) + +_pam_end = libpam.pam_end +_pam_end.restype = c_int +_pam_end.argtypes = (PamHandle, c_int) + +_pam_strerror = libpam.pam_strerror +_pam_strerror.restype = c_char_p +_pam_strerror.argtypes = (PamHandle, c_int) + + +# Other PAM-accessible functions may or may not be required for individual applications, +# so only error out *when they're invoked.* If a function is not exported by the found +# PAM C-library but the calling application never uses it, it won't be a problem. +try: + _pam_authenticate = libpam.pam_authenticate + _pam_authenticate.restype = c_int + _pam_authenticate.argtypes = (PamHandle, c_int) +except Exception as e: + _pam_authenticate = _pam_fn_unavailable("pam_authenticate", e) + +try: + _pam_setcred = libpam.pam_setcred + _pam_setcred.restype = c_int + _pam_setcred.argtypes = (PamHandle, c_int) +except Exception as e: + _pam_setcred = _pam_fn_unavailable("pam_setcred", e) + +try: + _pam_acct_mgmt = libpam.pam_acct_mgmt + _pam_acct_mgmt.restype = c_int + _pam_acct_mgmt.argtypes = (PamHandle, c_int) +except Exception as e: + _pam_acct_mgmt = _pam_fn_unavailable("pam_acct_mgmt", e) + +try: + _pam_open_session = libpam.pam_open_session + _pam_open_session.restype = c_int + _pam_open_session.argtypes = (PamHandle, c_int) + + _pam_close_session = libpam.pam_close_session + _pam_close_session.restype = c_int + _pam_close_session.argtypes = (PamHandle, c_int) +except Exception as e: + _pam_open_session = _pam_fn_unavailable("pam_open_session", e) + _pam_close_session = _pam_fn_unavailable("pam_close_session", e) diff --git a/compute_endpoint/tests/unit/conftest.py b/compute_endpoint/tests/unit/conftest.py index e1623e104..cb5ff38e4 100644 --- a/compute_endpoint/tests/unit/conftest.py +++ b/compute_endpoint/tests/unit/conftest.py @@ -7,6 +7,7 @@ import uuid import pytest +from globus_compute_endpoint.endpoint.config import PamConfiguration from globus_compute_endpoint.engines.helper import execute_task from tests.conftest import randomstring_impl @@ -50,6 +51,7 @@ def pytest_configure(config): "debug": True, "public": True, "identity_mapping_config_path": os.PathLike, + "pam": PamConfiguration, "force_mu_allow_same_user": True, "mu_child_ep_grace_period_s": float, "local_compute_services": True, @@ -80,6 +82,8 @@ def get_random_of_datatype_impl(cls): return random.randint(10_000, 1_000_000) elif issubclass(cls, float): return random.random() * 1_000_000 + elif issubclass(cls, PamConfiguration): + return PamConfiguration(True, "some-service-name") raise NotImplementedError(f"Missing test branch for type: {repr(cls)}") diff --git a/compute_endpoint/tests/unit/test_cli_behavior.py b/compute_endpoint/tests/unit/test_cli_behavior.py index 76510a6de..d041e71b2 100644 --- a/compute_endpoint/tests/unit/test_cli_behavior.py +++ b/compute_endpoint/tests/unit/test_cli_behavior.py @@ -313,6 +313,34 @@ def test_start_ep_reads_stdin( assert reg_info_found == {} +@pytest.mark.parametrize("fn_count", range(-1, 5)) +def test_start_ep_stdin_allowed_fns_overrides_conf( + mocker, run_line, mock_cli_state, make_endpoint_dir, ep_name, fn_count +): + if fn_count == -1: + allowed_fns = None + else: + allowed_fns = tuple(str(uuid.uuid4()) for _ in range(fn_count)) + + conf = UserEndpointConfig(executors=[ThreadPoolEngine]) + conf.allowed_functions = [uuid.uuid4() for _ in range(5)] # to be overridden + mock_get_config = mocker.patch(f"{_MOCK_BASE}get_config") + mock_get_config.return_value = conf + + mock_sys = mocker.patch(f"{_MOCK_BASE}sys") + mock_sys.stdin.closed = False + mock_sys.stdin.isatty.return_value = False + mock_sys.stdin.read.return_value = json.dumps({"allowed_functions": allowed_fns}) + + make_endpoint_dir() + + run_line(f"start {ep_name}") + mock_ep, _ = mock_cli_state + assert mock_ep.start_endpoint.called + (_, _, found_conf, *_), _k = mock_ep.start_endpoint.call_args + assert found_conf.allowed_functions == allowed_fns, "allowed field not overridden!" + + @pytest.mark.parametrize("use_uuid", (True, False)) @mock.patch(f"{_MOCK_BASE}get_config") def test_stop_endpoint( diff --git a/compute_endpoint/tests/unit/test_endpoint_config.py b/compute_endpoint/tests/unit/test_endpoint_config.py index 61c6d73e5..c3b1e5631 100644 --- a/compute_endpoint/tests/unit/test_endpoint_config.py +++ b/compute_endpoint/tests/unit/test_endpoint_config.py @@ -7,6 +7,7 @@ from globus_compute_endpoint.endpoint.config import ( ManagerEndpointConfig, ManagerEndpointConfigModel, + PamConfiguration, UserEndpointConfig, UserEndpointConfigModel, ) @@ -190,8 +191,9 @@ def test_configs_repr_default_kwargs(): assert ( repr(UserEndpointConfig()) == f"UserEndpointConfig(executors=({gce_repr},))" ), "adds default" + defs = f"multi_user=True, pam={PamConfiguration(enable=False)!r}" assert ( - repr(ManagerEndpointConfig()) == "ManagerEndpointConfig(multi_user=True)" + repr(ManagerEndpointConfig()) == f"ManagerEndpointConfig({defs})" ), "mu is on base" diff --git a/compute_endpoint/tests/unit/test_endpointmanager_unit.py b/compute_endpoint/tests/unit/test_endpointmanager_unit.py index db91f6e9d..4d3e5b392 100644 --- a/compute_endpoint/tests/unit/test_endpointmanager_unit.py +++ b/compute_endpoint/tests/unit/test_endpointmanager_unit.py @@ -68,6 +68,11 @@ ) +class MockPamError(Exception): + def __init__(self, *a, **k): + pass + + def mock_ensure_compute_dir(): return pathlib.Path(_mock_localuser_rec.pw_dir) / ".globus_compute" @@ -332,6 +337,55 @@ def create_response( return create_response +def _create_pam_handle_mock(): + try: + # attempt to play nice with systems that do not have PAM installed, and + # rely on those that do to test with spec=PamHandle + from globus_compute_endpoint.pam import PamHandle + + _has_pam = True + except ImportError: + _has_pam = False + + def _create_mock(): + # work with other fixtures (namely fs), that don't like multiple attempts to + # import; do the work once and cache it via closure + while True: + if _has_pam: + yield mock.MagicMock(spec=PamHandle) + else: + yield mock.MagicMock() + + return _create_mock() + + +create_pam_handle_mock = _create_pam_handle_mock() + + +@pytest.fixture +def mock_pamh(): + m = next(create_pam_handle_mock) + m.return_value = m + m.__enter__.return_value = m + yield m + + +@pytest.fixture +def mock_pam(mock_pamh): + with mock.patch(f"{_MOCK_BASE}_import_pam") as m: + m.return_value = m + m.PamHandle = mock_pamh + m.PamError = MockPamError + yield m + + +@pytest.fixture +def mock_ctl(): + with mock.patch(f"{_MOCK_BASE}_import_pyprctl") as m: + m.return_value = m + yield m + + @pytest.mark.parametrize("env", [None, "blar", "local", "production"]) def test_sets_process_title( randomstring, conf_dir, mock_conf, mock_client, mock_setproctitle, env @@ -947,7 +1001,7 @@ def test_iterates_even_if_no_commands(mocker, epmanager_as_root): @pytest.mark.parametrize("hb", (-100, -5, 0, 0.1, 4, 7, 11, None)) def test_heartbeat_period_minimum(conf_dir, mock_conf, hb, ep_uuid, mock_reg_info): if hb is not None: - mock_conf._heartbeat_period = hb # + mock_conf._heartbeat_period = hb assert mock_conf.heartbeat_period == hb, "Avoid config setter" em = EndpointManager(conf_dir, ep_uuid, mock_conf, mock_reg_info) exp_hb = 30.0 if hb is None else max(MINIMUM_HEARTBEAT, hb) @@ -1710,8 +1764,8 @@ def test_start_endpoint_privileges_dropped(successful_exec_from_mocked_root): "priv_func,ec", ( # manual accounting for exit code at failure points - ("initgroups", 71), - ("setresgid", 72), + ("setresgid", 71), + ("initgroups", 72), ("setresuid", 73), ), ) @@ -1754,7 +1808,7 @@ def test_start_endpoint_paranoid_reassumption_check( assert em.send_failure_notice.called, "Expect privilege failure to send notice" _a, k = em.send_failure_notice.call_args m = k["msg"] - exp_msg = "PermissionError: failed to start endpoint]" + exp_msg = "failed to start endpoint]" assert m.endswith(exp_msg), "Expect terse user msg, that doesn't include details" a, _k = mock_log.critical.call_args @@ -1781,7 +1835,8 @@ def test_run_as_same_user_disabled_if_admin( ep_uuid, mock_gcc = mock_client mock_pwd = mocker.patch(f"{_MOCK_BASE}pwd") - mock_prctl = mocker.patch(f"{_MOCK_BASE}pyprctl") + mock_prctl = mocker.patch(f"{_MOCK_BASE}_import_pyprctl") + mock_prctl.return_value = mock_prctl mock_prctl.CapState.get_current.return_value.effective = set() mock_pwd.getpwuid.return_value = namedtuple("getent", "pw_name,pw_uid")("asdf", 0) @@ -1951,8 +2006,7 @@ def test_pipe_size_limit(mocker, mock_log, successful_exec_from_mocked_root, con conf_str = "v: " + "$" * (conf_size - 3) - # Add 34 bytes for dict keys, etc. - stdin_data_size = conf_size + 34 + stdin_data_size = conf_size + 56 # overhead for JSON dict keys, etc. pipe_buffer_size = 512 # Subtract 256 for hard-coded buffer in-code is_valid = pipe_buffer_size - 256 - stdin_data_size >= 0 @@ -1984,6 +2038,30 @@ def _remove_user_config_template(*args, **kwargs): assert pyexc.value.code == _GOOD_EC, "Q&D: verify we exec'ed, based on '+= 1'" +@pytest.mark.parametrize("fn_count", (0, 1, 2, 3, random.randint(4, 100))) +def test_set_uep_allowed_functions( + successful_exec_from_mocked_root, mock_conf_root, fn_count +): + mock_os, *_, em = successful_exec_from_mocked_root + + m = mock.Mock() + mock_os.fdopen.return_value.__enter__.return_value = m + + fns = [str(uuid.uuid4()) for _ in range(fn_count)] + mock_conf_root.allowed_functions = fns + with mock.patch.object(fcntl, "fcntl", return_value=2**20): + # 2**20 == plenty for test + with pytest.raises(SystemExit) as pyexc: + em._event_loop() + + assert pyexc.value.code == _GOOD_EC, "Q&D: verify we exec'ed, based on '+= 1'" + + (received_stdin,), _k = m.write.call_args + parsed_stdin = json.loads(received_stdin) + assert "allowed_functions" in parsed_stdin, "Even empty list should be stated" + assert parsed_stdin["allowed_functions"] == fns + + def test_redirect_stdstreams_to_user_log( successful_exec_from_mocked_root, conf_dir, command_payload ): @@ -2054,3 +2132,148 @@ def test_port_is_respected(mocker, mock_client, mock_conf, conf_dir, port): EndpointManager(conf_dir, ep_uuid, mock_conf) assert mock_update_url_port.call_args[0][1] == port + + +@pytest.mark.parametrize( + "fn_name,pam_enable", + ( + ("_import_pam", True), + ("_import_pyprctl", False), + ), +) +def test_conditional_imports_verified_at_init_for_ux( + conf_dir, mock_conf, ep_uuid, mock_reg_info, mock_ctl, fn_name, pam_enable +): + mock_conf.pam.enable = pam_enable + with mock.patch(f"{_MOCK_BASE}{fn_name}") as m: + m.side_effect = MemoryError("test induced") + with pytest.raises(MemoryError): + EndpointManager(conf_dir, ep_uuid, mock_conf, mock_reg_info) + + +def test_pam_disabled(conf_dir, mock_conf, ep_uuid, mock_reg_info, mock_ctl, mock_pam): + em = EndpointManager(conf_dir, ep_uuid, mock_conf, mock_reg_info) + + mock_conf.pam.enable = False + with em.do_host_auth("some user name"): + pass + assert not mock_pam.called, "PAM was disable; should *not* attempt PAM" + assert mock_ctl.CapState.called, "No PAM? No privileges." + assert mock_ctl.set_no_new_privs.called, "No PAM? No privileges." + + +def test_pam_enabled(conf_dir, mock_conf, ep_uuid, mock_reg_info, mock_ctl, mock_pam): + def install_next_pamf(): + # ensure PAM functions called in appropriate order + fns = [ # reversed because we pop() to get each fn + "credentials_delete", + "pam_close_session", + "pam_open_session", + "credentials_establish", + ] + + def _install_next_test_func(): + if not fns: + return + fn = fns.pop() + getattr(pamh, fn).side_effect = _install_next_test_func + + return _install_next_test_func + + mock_conf.pam.enable = True + pamh = mock_pam.PamHandle + pamh.pam_acct_mgmt.side_effect = install_next_pamf() + pamh.credentials_establish.side_effect = AssertionError("Out of order") + pamh.pam_open_session.side_effect = AssertionError("Out of order") + pamh.pam_close_session.side_effect = AssertionError("Out of order") + pamh.credentials_delete.side_effect = AssertionError("Out of order") + + em = EndpointManager(conf_dir, ep_uuid, mock_conf, mock_reg_info) + with em.do_host_auth("some user name"): + assert pamh.pam_open_session.called, "Complete authentication" + assert not pamh.credentials_delete.called, "PAM session *not* over yet" + assert pamh.credentials_delete.called, "PAM session completes" + + assert not mock_ctl.CapState.called, "Using PAM; admin manages privs" + assert not mock_ctl.set_no_new_privs.called, "Using PAM; admin manages privs" + + +@pytest.mark.parametrize( + "fn_name", + ( + "pam_acct_mgmt", + "credentials_establish", + "pam_open_session", + "pam_close_session", + "credentials_delete", + ), +) +@pytest.mark.parametrize("exc", (MockPamError("test err"), MemoryError("test err"))) +def test_pam_error( + mock_log, conf_dir, mock_conf, ep_uuid, mock_reg_info, fn_name, mock_pam, exc +): + em = EndpointManager(conf_dir, ep_uuid, mock_conf, mock_reg_info) + + mock_conf.pam.enable = True + pamh = mock_pam.PamHandle + username = "some username" + getattr(pamh, fn_name).side_effect = exc + with pytest.raises(PermissionError) as pyt_e: + with em.do_host_auth(username): + pass + + e_str = str(pyt_e.value) + assert "PAM" not in e_str, "User-visible exception should be opaque" + assert "see your system administrator" in e_str, "User-visible should have action" + + if not isinstance(exc, MockPamError): + assert mock_log.exception.called, "Admin log should contain entire exception" + a, _k = mock_log.exception.call_args + + assert username in a[0], "Admin log should contain related username" + + +def test_do_auth_change_uid_then_close( + mock_conf_root, successful_exec_from_mocked_root, mock_pam +): + mock_os, *_, em = successful_exec_from_mocked_root + + def this_func(fn_opener, fn_name: str): + def _mark_called(*_a, **_k): + fn_opener(fn_name) + + return _mark_called + + def set_called(): + fn_calls = {"setresuid", "setresgid", "initgroups"} + + def _called(fn_name): + if fn_name == "pam_open_session": + # these are now allowed + mock_os.setresuid.side_effect = this_func(fn_opener, "setresuid") + mock_os.setresgid.side_effect = this_func(fn_opener, "setresgid") + mock_os.initgroups.side_effect = this_func(fn_opener, "initgroups") + return + + fn_calls.discard(fn_name) + if not fn_calls: + # once we've become the new user, pam session may close + pamh.pam_close_session.side_effect = None + + return _called + + mock_conf_root.pam.enable = True + fn_opener = set_called() + + pamh = mock_pam.PamHandle + pamh.pam_open_session.side_effect = this_func(fn_opener, "pam_open_session") + pamh.pam_close_session.side_effect = AssertionError("Out of order") + mock_os.setresuid.side_effect = AssertionError("Out of order") + mock_os.setresgid.side_effect = AssertionError("Out of order") + mock_os.initgroups.side_effect = AssertionError("Out of order") + + with pytest.raises(SystemExit) as pyexc: + em._event_loop() + + assert pyexc.value.code == _GOOD_EC, "Q&D: verify we exec'ed, based on '+= 1'" + assert pamh.pam_close_session.called diff --git a/compute_endpoint/tests/unit/test_pam.py b/compute_endpoint/tests/unit/test_pam.py new file mode 100644 index 000000000..f28541f68 --- /dev/null +++ b/compute_endpoint/tests/unit/test_pam.py @@ -0,0 +1,154 @@ +import random +from unittest import mock + +import pytest +from globus_compute_endpoint import pam +from globus_compute_endpoint.pam import PamCred, PamError, PamHandle, PamReturnEnum + +_MOCK_BASE = "globus_compute_endpoint.pam." + + +@pytest.fixture +def pamh(): + return PamHandle("some service", "some username") + + +def test_pamerror_includes_enum_name(pamh): + with pamh: + for errenum in PamReturnEnum: + e = PamError(pamh, errenum.value) + assert f"[{errenum.name}]" in str(e), (errenum, "Expect enum name in error") + + +@pytest.mark.parametrize("name_len", (None, -5, 0, 11, 513)) +def test_pamh_username_limits_length(randomstring, name_len): + if name_len is None: + pamh = PamHandle("some service") + assert pamh.max_username_len == 512, "Expect sensible default; match sshd" + else: + pamh = PamHandle("some service", max_username_len=name_len) + + assert pamh.max_username_len > 0 + + pamh.username = randomstring(pamh.max_username_len + 1) + assert ( + len(pamh.username) == pamh.max_username_len + ), "Like ssh, protect buggy PAM implementations from themselves" + + +def test_pamh_start_does_not_clobber_existing_session(pamh): + pamh._started = True + with pytest.raises(RuntimeError): + pamh.pam_start(pamh.service_name, pamh.username, None) + + +def test_pamh_start_raises_on_non_success(pamh): + errenum: PamReturnEnum = random.choice(tuple(PamReturnEnum)[1:]) # no SUCCESS! + with mock.patch(f"{_MOCK_BASE}_pam_start") as mock_start: + mock_start.return_value = errenum.value + with pytest.raises(PamError): + pamh.pam_start(pamh.service_name, pamh.username, None) + assert not pamh._started + + +def test_pamh_start_success(pamh): + with mock.patch(f"{_MOCK_BASE}_pam_start") as mock_start: + mock_start.return_value = 0 + pamh.pam_start(pamh.service_name, pamh.username, None) + + a, _k = mock_start.call_args + assert isinstance(a[0], bytes), "Expect Python str encoded to *bytes*" + assert isinstance(a[1], bytes), "Expect Python str encoded to *bytes*" + assert pamh._started, "Expected to mark session is open" + + +def test_pamh_context_verifies_service_name(pamh): + pamh.service_name = "" + with pytest.raises(ValueError) as pyt_e: + with pamh: + pass + e_str = str(pyt_e.value) + assert "Missing service name" in e_str, "Expect human description of problem" + assert "use `.service_name`" in e_str, "Expect suggestion in message" + assert "__init__" in e_str, "Expect suggestion in message" + + +def test_pamh_context_verifies_username(pamh): + pamh.username = "" + with pytest.raises(ValueError) as pyt_e: + with pamh: + pass + e_str = str(pyt_e.value) + assert "Missing user name" in e_str, "Expect human description of problem" + assert "use `.username`" in e_str, "Expect suggestion in message" + assert "__init__" in e_str, "Expect suggestion in message" + + +def test_pamh_context_starts_session(pamh): + with mock.patch.object(pamh, "pam_start") as mock_start: + with pamh as test_pamh: + assert pamh is test_pamh + + assert mock_start.called + a, _k = mock_start.call_args + assert (a[0], a[1]) == (pamh.service_name, pamh.username), "Expect useful defaults" + + +def test_pamh_context_ends_session(pamh): + with mock.patch.object(pamh, "pam_end") as mock_end: + with pamh: + assert pamh._started + assert mock_end.called + + +def test_pamh_context(pamh): + assert pamh._started is False + with pamh: + assert pamh._started + assert pamh._started is False + + +def test_pamh_flag_only_methods_raises(pamh): + errenum: PamReturnEnum = random.choice(tuple(PamReturnEnum)[1:]) # no SUCCESS! + + def return_nonzero(*a, **k): + return errenum.value + + with pytest.raises(PamError) as pyt_e: + pamh._flags_only_method(return_nonzero) + + assert pamh.last_rc == errenum.value + assert errenum.name in str(pyt_e.value) + + +@pytest.mark.parametrize( + "fn_name", ("setcred", "acct_mgmt", "open_session", "close_session", "authenticate") +) +def test_pamh_flags_only_passthrough(pamh, fn_name): + pamh_fn_name = f"pam_{fn_name}" + pam_lib_fn = getattr(pam, f"_{pamh_fn_name}") + flags = random.randint(0, 0xFFFF) + with mock.patch.object(pamh, "_flags_only_method") as mock_fl: + getattr(pamh, pamh_fn_name)(flags) + + assert mock_fl.called + a, _k = mock_fl.call_args + assert a[0] is pam_lib_fn, "Correct PAM callable supplied" + assert a[1] == flags + + +@pytest.mark.parametrize( + "fn_name,flag", + ( + ("credentials_establish", PamCred.PAM_ESTABLISH_CRED), + ("credentials_delete", PamCred.PAM_DELETE_CRED), + ("credentials_refresh", PamCred.PAM_REFRESH_CRED), + ("credentials_reinitialize", PamCred.PAM_REINITIALIZE_CRED), + ), +) +def test_pamh_creds(pamh, fn_name, flag): + with mock.patch.object(pamh, "pam_setcred") as mock_set: + getattr(pamh, fn_name)() + assert mock_set.called, "Convenience method wrapper calls pam_setcred" + a, _k = mock_set.call_args + assert a[0] is flag diff --git a/docs/endpoints/config_reference.rst b/docs/endpoints/config_reference.rst index 51f1e7817..2be44f2bf 100644 --- a/docs/endpoints/config_reference.rst +++ b/docs/endpoints/config_reference.rst @@ -89,10 +89,10 @@ But both engines will only run tasks *on the endpoint host machine*. If the end strictly limited to a single host (e.g., a home desktop, an idle workstation), then these engines may be the simplest option. -For running in a multi-node setup (e.g., clusters, with scheduling software like PBS or -Slurm), the ``GlobusComputeEngine`` enables much more concurrency. This engine has more -options and is similarly more complicated to configure. A rough equivalent to the -``ProcessPoolEngine`` example would be: +For running in a multi-node setup (e.g., clusters, with scheduling software like `PBS`_ +or `Slurm`_), the ``GlobusComputeEngine`` enables much more concurrency. This engine +has more options and is similarly more complicated to configure. A rough equivalent to +the ``ProcessPoolEngine`` example would be: .. code-block:: yaml :caption: ``~/.globus_compute/my_first_cluster_setup/config.yaml`` @@ -223,6 +223,7 @@ the |HighThroughputExecutor|_ and the `available providers`_. .. |SlurmProvider| replace:: ``SlurmProvider`` .. _SlurmProvider: https://parsl.readthedocs.io/en/stable/stubs/parsl.providers.SlurmProvider.html +.. _PBS: https://openpbs.org/ .. _Slurm: https://slurm.schedmd.com/overview.html .. _Parsl implements a number of providers: https://parsl.readthedocs.io/en/stable/reference.html#providers .. _available providers: https://parsl.readthedocs.io/en/stable/reference.html#providers @@ -244,17 +245,105 @@ This flag tells the Compute endpoint logic to instantiate a |ManagerEndpointConf instance and thereby to start an MEP and not a UEP. The other configuration items of note are: -- ``identity_mapping_config_path`` |nbsp| --- |nbsp| necessary if the MEP will be run - with ``root`` privileges -- ``display_name`` |nbsp| --- |nbsp| for a more approachable name in the - `Web UI `_ -- ``allowed_functions`` |nbsp| --- |nbsp| to restrict what functions may be run by - UEPs -- ``authentication_policy`` |nbsp| --- |nbsp| for restricting who can use the MEP at - the web service +- ``identity_mapping_config_path`` + + A path to an identity mapping configuration, per the Globus Connect Server `Identity + Mapping Guide`_. The configuration file must be a JSON-list of identity mapping + configurations. The MEP documentation :ref:`discusses the + content` of this file in detail. + + This field is required for MEPs run by the ``root`` user. For MEPs run by + non-``root`` users (or those without ``setuid`` capabilities), this field is + ignored. + + .. code-block:: yaml + :caption: Example MEP ``config.yaml`` with an identity mapping path + + multi_user: true + identity_mapping_config_path: /path/to/idmap_config.json + +- ``public`` + + A boolean value, dictating whether other users can discover this MEP in the Globus + Compute web API and Globus `Web UI`_. It defaults to ``false``. + + .. warning:: + + This field does **not** prevent access to the endpoint. It determines only + whether this MEP is easily discoverable |nbsp| --- |nbsp| do not use this field as + a security control. + + .. code-block:: yaml + :caption: ``config.yaml`` -- example public MEP + + multi_user: true + public: true + +- ``display_name`` + + If not specified, the endpoint will show up in the `Web UI`_ as the given local name. + (In other words, the same name as used to create the endpoint with the ``configure`` + subcommand, and as used for the directory name inside of ``~/.globus_compute/``.) + This field is free-form (accepting space characters, for example). + + .. code-block:: yaml + :caption: ``config.yaml`` -- naming a public MEP + + display_name: Debug queue, 10m max job time (RCC, Midway, UChicago) + multi_user: true + public: true + +- ``allowed_functions`` + + This field specifies an allow-list of functions that may be run by child UEPs. As + this list is available at MEP registration time, not only do the UEPs verify that + each task requests a valid function, but the web-service enforces the allowed + functions list at task submission as well. For more information, see :ref:`MEP § + Function Allow Listing `. + + .. code-block:: yaml + :caption: ``config.yaml`` -- allowing UEPs to only run certain functions + + multi_user: true + allowed_functions: + - 00911703-e76b-4d0b-7b98-6f2e25ab9943 + - e552e7f2-c007-4671-6ca4-3a4fd84f3805 + +- ``authentication_policy`` + + Use a Globus `Authentication Policy`_ to restrict who can use the MEP at the web + service. (Note that authentication policies require a subscription.) See + :ref:`MEP § Authentication Policies ` for more information. + + .. code-block:: yaml + :caption: ``config.yaml`` -- allowing only valid identities to use the MEP + + multi_user: true + authentication_policy: 498c7327-9c6a-4847-c954-1eafa923da8e + subscription_id: 600ba9ac-ef16-4387-30ad-60c6cc3a6853 + +- ``pam`` + + Use `Pluggable Authentication Modules`_ (PAM) for site-specific authorization + requirements. A structure with ``enable`` and ``service_name`` options. Defaults to + disabled and ``globus-compute-endpoint``. See :ref:`MEP § PAM ` for more + information. + + .. code-block:: yaml + :caption: ``config.yaml`` -- enabling PAM + + multi_user: true + pam: + enable: true These options are all described in detail in :doc:`multi_user` + +.. _Identity Mapping Guide: https://docs.globus.org/globus-connect-server/v5.4/identity-mapping-guide/ +.. _Web UI: https://app.globus.org/compute +.. _Authentication Policy: https://docs.globus.org/api/auth/developer-guide/#authentication-policies +.. _Pluggable Authentication Modules: https://en.wikipedia.org/wiki/Linux_PAM + ---- .. _config-class-doc: @@ -283,6 +372,10 @@ available options. :members: :member-order: bysource +.. autoclass:: globus_compute_endpoint.endpoint.config.pam.PamConfiguration + :members: + :member-order: bysource + .. |nbsp| unicode:: 0xA0 :trim: diff --git a/docs/endpoints/multi_user.rst b/docs/endpoints/multi_user.rst index 91f70f229..e8e6f015a 100644 --- a/docs/endpoints/multi_user.rst +++ b/docs/endpoints/multi_user.rst @@ -173,7 +173,7 @@ regex before searching, so the actual regular expression used would be ``^(.*)@example.com$``. Finally, if a match is found, the first saved group is the output (i.e., ``{0}``). If the ``username`` field contained ``mickey97@example.com``, then this configuration would return ``mickey97``, and the MEP would then use -`getpwnam(3)`_ to look up ``mickey97``. But if the username field(s) did not end with +|getpwnam(3)|_ to look up ``mickey97``. But if the username field(s) did not end with ``@example.com``, then it would not match and the start-UEP request would fail. .. code-block:: json @@ -661,6 +661,150 @@ Installing the MEP as a service is the same :ref:`procedure as with a regular en install a systemd unit file. +.. _pam: + +Pluggable Authentication Modules (PAM) +====================================== + +`Pluggable Authentication Modules`_ (PAM) allows administrators to configure +site-specific authentication schemes with arbitrary requirements. For example, where +one site might require users to use `MFA`_, another site could disallow use of the +system for some users at certain times of the day. Rather than rewrite or modify +software to accommodate each site's needs, administrators can simply change their site +configuration. + +As a brief intro to PAM, the architecture is designed with four phases: + +- authentication +- account management +- session management +- password management + +The MEP implements *account* and *session management*. If enabled, then the child +process will create a PAM session, check the account (|pam_acct_mgmt(3)|_), and then +open a session (|pam_open_session(3)|_). If these two steps succeed, then the MEP will +continue to drop privileges and become the UEP. But in these two steps is where the +administrator can implement custom configuration. + +PAM is configured in two parts. For the MEP, use the ``pam`` field: + +.. code-block:: yaml + :caption: ``config.yaml`` to show PAM + :emphasize-lines: 3,4 + + multi_user: true + identity_mapping_config_path: .../some/idmap.json + pam: + enable: true + +This configuration will choose the default PAM service name, +``globus-compute-endpoint`` (see |PamConfiguration|). The service name is the name of +the PAM configuration file in ``/etc/pam.d/``. Use ``service_name`` to tell the MEP +to authorize users against a different PAM configuration: + +.. code-block:: yaml + :caption: ``config.yaml`` with a custom PAM service name + :emphasize-lines: 7 + + multi_user: true + identity_mapping_config_path: .../some/idmap.json + pam: + enable: true + + # the PAM routines will look for `/etc/pam.d/gce-mep123-specific-requirements` + service_name: gce-mep123-specific-requirements + +For clarity, note that the service name is simply passed to |pam_start(3)|_, to tell +PAM which service configuration to apply. + +.. important:: + + If PAM is not enabled, then before starting user endpoints, the child process drops + all capabilities and sets the no-new-privileges flag with the kernel. (See + |prctl(2)|_ and reference ``PR_SET_NO_NEW_PRIVS``). In particular, this will + preclude use of SETUID executables, which can break some schedulers. If your site + requires use of SETUID executables, then PAM must be enabled. + +Though configuring PAM itself is outside the scope of this document (e.g., see +|PAM_SAG|_), we briefly discuss a couple of modules to share a taste of what PAM can +do. For example, if the administrator were to implement a configuration of: + +.. code-block:: text + :caption: ``/etc/pam.d/globus-compute-endpoint`` + + account requisite pam_shells.so + session required pam_limits.so + +then, per |pam_shells(8)|_, any UEP for a user whose shell is not listed in +``/etc/shells`` will not start and the logs will have a line like: + +.. code-block:: text + + ... (error code: 7 [PAM_AUTH_ERR]) Authentication failure + +On the other end, the user's SDK would receive a message like: + +.. code-block:: text + + Request payload failed validation: Unable to start user endpoint process for jessica [exit code: 71; (PermissionError) see your system administrator] + +Similarly, for users who are administratively allowed (i.e., have a valid shell), the +|pam_limits(8)|_ module will install the admin-configured process limits. + +.. hint:: + + The Globus Compute Endpoint software implements the account management and session + phases of PAM. As authentication is enacted via Globua Auth and + :ref:`Identity Mapping `, it does not use PAM's authentication + (|pam_authenticate(3)|_) phase, nor does it attempt to manage the user's password. + Functionally, this means that only PAM configuration lines that begin with + ``account`` and ``session`` will be utilized. + +Look to PAM for a number of tasks (which we tease here, but are similarly out of scope +of this documentation): + +- Setting UEP process capabilities (|pam_cap(8)|_) +- Setting UEP process limits (|pam_limits(8)|_) +- Setting environment variables (|pam_env(8)|_) +- Enforcing ``/var/run/nologin`` (|pam_nologin(8)|_) +- Updating ``/var/log/lastlog`` (|pam_lastlog(8)|_) +- Create user home directory on demand (|pam_mkhomedir(8)|_) + +(If the available PAM modules do not fit the bill, it is also possible to write a +custom module! But sadly, that is also out of scope of this documentation; please see +|PAM_MWG|_.) + +.. _MFA: https://en.wikipedia.org/wiki/Multi-factor_authentication +.. |PAM_SAG| replace:: The Linux-PAM System Administrators' Guide +.. _PAM_SAG: https://www.chiark.greenend.org.uk/doc/libpam-doc/html/Linux-PAM_SAG.html +.. |PAM_MWG| replace:: The Linux-PAM Module Writers' Guide +.. _PAM_MWG: https://www.chiark.greenend.org.uk/doc/libpam-doc/html/Linux-PAM_MWG.html +.. |pam_acct_mgmt(3)| replace:: ``pam_acct_mgmt(3)`` +.. _pam_acct_mgmt(3): https://www.man7.org/linux/man-pages/man3/pam_acct_mgmt.3.html +.. |pam_open_session(3)| replace:: ``pam_open_session(3)`` +.. _pam_open_session(3): https://www.man7.org/linux/man-pages/man3/pam_open_session.3.html +.. |pam_authenticate(3)| replace:: ``pam_authenticate(3)`` +.. _pam_authenticate(3): https://www.man7.org/linux/man-pages/man3/pam_authenticate.3.html +.. |pam_start(3)| replace:: ``pam_start(3)`` +.. _pam_start(3): https://www.man7.org/linux/man-pages/man3/pam_start.3.html +.. |pam_shells(8)| replace:: ``pam_shells(8)`` +.. _pam_shells(8): https://www.man7.org/linux/man-pages/man8/pam_shells.8.html +.. |pam_limits(8)| replace:: ``pam_limits(8)`` +.. _pam_limits(8): https://www.man7.org/linux/man-pages/man8/pam_limits.8.html +.. |pam_cap(8)| replace:: ``pam_cap(8)`` +.. _pam_cap(8): https://www.man7.org/linux/man-pages/man8/pam_cap.8.html +.. |pam_env(8)| replace:: ``pam_env(8)`` +.. _pam_env(8): https://www.man7.org/linux/man-pages/man8/pam_env.8.html +.. |pam_nologin(8)| replace:: ``pam_nologin(8)`` +.. _pam_nologin(8): https://www.man7.org/linux/man-pages/man8/pam_nologin.8.html +.. |pam_lastlog(8)| replace:: ``pam_lastlog(8)`` +.. _pam_lastlog(8): https://www.man7.org/linux/man-pages/man8/pam_lastlog.8.html +.. |pam_mkhomedir(8)| replace:: ``pam_mkhomedir(8)`` +.. _pam_mkhomedir(8): https://www.man7.org/linux/man-pages/man8/pam_mkhomedir.8.html + +.. |prctl(2)| replace:: ``prctl(2)`` +.. _prctl(2): https://www.man7.org/linux/man-pages/man2/prctl.2.html + .. _auth-policies: Authentication Policies @@ -711,6 +855,13 @@ Administrators can create new authentication policies via the `Globus Auth API occurred to satisfy the policy. Setting this will also set ``high_assurance`` to ``true``. + .. attention:: + + For performance reasons, the web-service caches lookups for 60s. Pragmatically, + this means that smallest timeout that Compute supports is 1 minute, even though it + is possible to set required authorizations for high assurance policies to smaller + time intervals. + Apply an Existing Authentication Policy --------------------------------------- @@ -730,8 +881,10 @@ make the necessary changes to ``config.yaml``: $ globus-compute-endpoint configure my-mep --multi-user --auth-policy 2340174a-1a0e-46d8-a958-7c3ddf2c834a -Function Whitelisting -===================== +.. _function-allowlist: + +Function Allow Listing +====================== To require that UEPs only invoke certain functions, specify the ``allowed_functions`` top-level configuration item: @@ -837,7 +990,7 @@ The workflow for a task sent to a MEP roughly follows these steps: #. The MEP maps the Globus Auth identity in the start-UEP-request to a local (POSIX) username. -#. The MEP ascertains the host-specific UID based on a `getpwnam(3)`_ call with the +#. The MEP ascertains the host-specific UID based on a |getpwnam(3)|_ call with the local username from the previous step. #. The MEP starts a UEP as the UID from the previous step. @@ -1028,7 +1181,7 @@ Administrator Quickstart #. Setup the identity mapping configuration |nbsp| --- |nbsp| this depends on your site's specific requirements and may take some trial and error. The key point is to be able to take a Globus Auth Identity set, and map it to a local username *on this - resource* |nbsp| --- |nbsp| this resulting username will be passed to `getpwnam(3)`_ + resource* |nbsp| --- |nbsp| this resulting username will be passed to |getpwnam(3)|_ to ascertain a UID for the user. This file is linked in ``config.yaml`` (from the previous step's output), and, per initial configuration, is set to ``example_identity_mapping_config.json``. While the configuration is syntactically @@ -1119,6 +1272,7 @@ Administrator Quickstart .. _Authentication Policies documentation: https://docs.globus.org/api/auth/developer-guide/#authentication_policy_fields .. |globus-identity-mapping| replace:: ``globus-identity-mapping`` .. _globus-identity-mapping: https://pypi.org/project/globus-identity-mapping/ +.. |getpwnam(3)| replace:: ``getpwnam(3)`` .. _getpwnam(3): https://www.man7.org/linux/man-pages/man3/getpwnam.3.html .. _Jinja template: https://jinja.palletsprojects.com/en/3.1.x/ .. _Globus Connect Server Identity Mapping Guide: https://docs.globus.org/globus-connect-server/v5.4/identity-mapping-guide/#mapping_recipes @@ -1126,7 +1280,10 @@ Administrator Quickstart .. |UserRuntime| replace:: :class:`UserRuntime ` .. _JSON schema: https://json-schema.org/ +.. |PamConfiguration| replace:: :class:`PamConfiguration ` + .. _virtualenv: https://pypi.org/project/virtualenv/ .. _pipx: https://pypa.github.io/pipx/ .. _conda: https://docs.conda.io/en/latest/ .. _dill: https://pypi.org/project/dill/ +.. _Pluggable Authentication Modules: https://en.wikipedia.org/wiki/Linux_PAM