Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement PAM for MEPs #1724

Merged
merged 2 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions changelog.d/20241115_095433_kevin_implement_pam_for_meps.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
New Functionality
^^^^^^^^^^^^^^^^^

- Implement optional PAM capabilities for ensuring user accounts meet
site-specific criteria before starting user endpoints. Within the multi user
endpoint, PAM defaults to off, but is enabled via the ``pam`` field:

.. code-block:: yaml
:caption: ``config.yaml`` -- Example MEP configuration opting-in to PAM

multi_user: true
pam:
enable: true

As authentication is implemented via Globus Auth and identity mapping, the
Globus Compute Endpoint does not implement the authorization or password
managment phases of PAM. It implements account
(|pam_acct_mgmt(3)|_) and session (|pam_open_session(3)|) management.

For more information, consult :ref:`the PAM section <pam>` of the
documentation.

.. |pam_acct_mgmt(3)| replace:: ``pam_acct_mgmt(3)``
.. _pam_acct_mgmt(3): https://www.man7.org/linux/man-pages/man3/pam_acct_mgmt.3.html
.. |pam_open_session(3)| replace:: ``pam_open_session(3)``
.. _pam_open_session(3): https://www.man7.org/linux/man-pages/man3/pam_open_session.3.html

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import textwrap

from click import ClickException
from globus_compute_endpoint.endpoint.config import UserEndpointConfig
from globus_compute_endpoint.endpoint.config.config import UserEndpointConfig
from globus_compute_endpoint.endpoint.config.utils import get_config
from globus_compute_endpoint.endpoint.endpoint import Endpoint
from globus_sdk import GlobusApp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
ManagerEndpointConfigModel,
UserEndpointConfigModel,
)
from .pam import PamConfiguration # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)

from ..utils import is_privileged
from .pam import PamConfiguration

MINIMUM_HEARTBEAT: float = 5.0
log = logging.getLogger(__name__)
Expand Down Expand Up @@ -327,6 +328,10 @@ class ManagerEndpointConfig(BaseConfig):
configuration item is required, and a ``ValueError`` will be raised if the path
does not exist.

:param pam: Whether to enable authorization of user-endpoints via PAM routines, and
optionally specify the PAM service name. See |PamConfiguration|. If not
specified, PAM authorization defaults to disabled.

:param mu_child_ep_grace_period_s: The web-services send a start-user-endpoint to
the endpoint manager ahead of tasks for the target user endpoint. If the
user-endpoint is already running, these requests are ignored. To account for
Expand All @@ -347,6 +352,7 @@ class ManagerEndpointConfig(BaseConfig):
.. |BaseConfig| replace:: :class:`BaseConfig <globus_compute_endpoint.endpoint.config.config.BaseConfig>`
.. |ManagerEndpointConfig| replace:: :class:`ManagerEndpointConfig <globus_compute_endpoint.endpoint.config.config.ManagerEndpointConfig>`
.. |UserEndpointConfig| replace:: :class:`UserEndpointConfig <globus_compute_endpoint.endpoint.config.config.UserEndpointConfig>`
.. |PamConfiguration| replace:: :class:`PamConfiguration <globus_compute_endpoint.endpoint.config.pam.PamConfiguration>`

.. |setuid(2)| replace:: ``setuid(2)``
.. _setuid(2): https://www.man7.org/linux/man-pages/man2/setuid.2.html
Expand All @@ -357,6 +363,7 @@ def __init__(
*,
public: bool = False,
identity_mapping_config_path: os.PathLike | str | None = None,
pam: PamConfiguration | None = None,
force_mu_allow_same_user: bool = False,
mu_child_ep_grace_period_s: float = 30.0,
**kwargs,
Expand All @@ -372,6 +379,8 @@ def __init__(
_tmp = identity_mapping_config_path # work with both mypy and flake8
self.identity_mapping_config_path = _tmp # type: ignore[assignment]

self.pam = pam or PamConfiguration(enable=False)

@property
def identity_mapping_config_path(self) -> pathlib.Path | None:
return self._identity_mapping_config_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
validator,
)
from globus_compute_endpoint import engines, strategies
from globus_compute_endpoint.endpoint.config.pam import PamConfiguration
from parsl import addresses as parsl_addresses
from parsl import channels as parsl_channels
from parsl import launchers as parsl_launchers
Expand Down Expand Up @@ -185,6 +186,7 @@ class ManagerEndpointConfigModel(BaseConfigModel):
identity_mapping_config_path: t.Optional[FilePath]
force_mu_allow_same_user: t.Optional[bool]
mu_child_ep_grace_period_s: t.Optional[float]
pam: t.Optional[PamConfiguration]

class Config:
extra = "forbid"
26 changes: 26 additions & 0 deletions compute_endpoint/globus_compute_endpoint/endpoint/config/pam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from dataclasses import asdict, dataclass

import yaml


@dataclass
class PamConfiguration:
"""
:param enable: Whether to initiate a PAM session for each UEP start request.

:param service_name: What PAM service name with which to initialize the PAM
session. If a particular MEP has different requirements, define those PAM
requirements in ``/etc/pam.d/``, and specify the service name with this field.

See :ref:`MEP § PAM <pam>` for more information
"""

enable: bool = True
service_name: str = "globus-compute-endpoint"


def _to_yaml(dumper: yaml.SafeDumper, data: PamConfiguration):
return dumper.represent_mapping("tag:yaml.org,2002:map", asdict(data))


yaml.SafeDumper.add_representer(PamConfiguration, _to_yaml)
147 changes: 120 additions & 27 deletions compute_endpoint/globus_compute_endpoint/endpoint/endpoint_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,18 @@
import sys
import threading
import time
import types
import typing as t
import uuid
from concurrent.futures import Future
from contextlib import contextmanager
from datetime import datetime
from http import HTTPStatus

import globus_compute_sdk as GC
from cachetools import TTLCache
from globus_compute_endpoint.endpoint.identity_mapper import PosixIdentityMapper

try:
import pyprctl
except AttributeError as e:
raise ImportError("pyprctl is not supported on this system") from e
import setproctitle
import yaml
from cachetools import TTLCache
from globus_compute_common.messagepack import pack
from globus_compute_common.messagepack.message_types import EPStatusReport
from globus_compute_common.pydantic_v1 import BaseModel
Expand All @@ -42,6 +38,7 @@
serialize_config,
)
from globus_compute_endpoint.endpoint.endpoint import Endpoint
from globus_compute_endpoint.endpoint.identity_mapper import PosixIdentityMapper
from globus_compute_endpoint.endpoint.rabbit_mq import (
CommandQueueSubscriber,
ResultPublisher,
Expand Down Expand Up @@ -69,6 +66,23 @@ class InvalidUserError(Exception):
pass


def _import_pyprctl():
# Enable conditional import, and create a hook-point for testing to mock
try:
import pyprctl
except AttributeError as e:
raise ImportError("pyprctl is not supported on this system") from e

return pyprctl


def _import_pam() -> types.ModuleType:
# Enable conditional import, and create a hook-point for testing to mock
from globus_compute_endpoint import pam

return pam


class UserEndpointRecord(BaseModel):
ep_name: str
local_user_info: t.Optional[pwd.struct_passwd]
Expand Down Expand Up @@ -104,6 +118,14 @@ def __init__(

self.conf_dir = conf_dir
self._config = config

# UX - test conditional imports *now*, rather than when a request comes in;
# this gives immediate feedback to an implementing admin if something is awry
if config.pam.enable:
_import_pam()
else:
_import_pyprctl()

self._reload_requested = False
self._time_to_stop = False
self._kill_event = threading.Event()
Expand Down Expand Up @@ -761,6 +783,83 @@ def send_failure_notice(
finally:
sys.exit()

@contextmanager
def do_host_auth(self, username: str):
def _affix_logd(prefix: str = "", suffix: str = ""):
def _wrap(msg, *a, **k):
log.debug(f"{prefix}{msg}{suffix}", *a, **k)

return _wrap

if not self._config.pam.enable:
try:
logd = _affix_logd(f"PRCTL ({username}): ")
logd("Importing module")
pyprctl = _import_pyprctl()
except Exception:
log.exception(f"({username}) Failed to import PRCTL library")
raise PermissionError("see your system administrator") from None

yield

try:
# If the administrator has *not* enabled PAM, then assume the
# intention is for a paranoid safe process and drop all
# privileges now ...
logd("Dropping all process capabilities")
pyprctl.CapState().set_current()

# ... and stating that even if exec'ing might return some
# privileges, "no." In particular after this, SETUID executables
# invoked from this process root will not get privileges
logd("Allowing no new process privileges (no setuid executables!)")

pyprctl.set_no_new_privs()
except Exception:
log.exception(f"({username}) Failed to import PRCTL library")
raise PermissionError("see your system administrator") from None

return

sname = self._config.pam.service_name
try:
logd = _affix_logd(f"PAM ({sname}, {username}): ")
logd("Importing module")
pam = _import_pam()

logd("Creating handle")
with pam.PamHandle(sname, username=username) as pamh:
logd("Invoking account stage")
pamh.pam_acct_mgmt()
logd("Creating credentials")
pamh.credentials_establish()
logd("Opening session")
pamh.pam_open_session()

yield

# wiped by initgroups, so reinitialize
logd("Recreating credentials")
pamh.credentials_establish()
logd("Closing session")
pamh.pam_close_session()
logd("Removing credentials")
pamh.credentials_delete()

logd("Closing handle")

except pam.PamError as e:
log.error(str(e)) # Share pamlib error with admin ...

# ... but be opaque with user.
raise PermissionError("see your system administrator") from None
khk-globus marked this conversation as resolved.
Show resolved Hide resolved

except Exception:
log.exception(f"Unhandled error during PAM session for {username}")

# Regardless, be opaque with user.
raise PermissionError("see your system administrator") from None

def cmd_start_endpoint(
self,
user_record: pwd.struct_passwd,
Expand Down Expand Up @@ -898,25 +997,25 @@ def cmd_start_endpoint(
# who run the multi-user setup as a non-privileged user, there is
# no need to change the user: they're already executing _as that
# uid_!
log.debug("Initializing groups for %s, %s", uname, gid)
os.initgroups(uname, gid) # raises (good!) on error
exit_code += 1

# But actually becoming the correct UID is _not_ fungible. If we
# can't -- for whatever reason -- that's a problem. So do NOT
# ignore the potential error.
log.debug("Setting process group for %s to %s", pid, gid)
os.setresgid(gid, gid, gid) # raises (good!) on error
exit_code += 1
log.debug("Setting process uid for %s to %s (%s)", pid, uid, uname)
os.setresuid(uid, uid, uid) # raises (good!) on error
exit_code += 1
with self.do_host_auth(uname):
log.debug("Setting process group for %s to %s", pid, gid)
os.setresgid(gid, gid, gid) # raises (good!) on error
exit_code += 1

log.debug("Initializing groups for %s, %s", uname, gid)
os.initgroups(uname, gid) # raises (good!) on error
exit_code += 1

log.debug("Setting process uid for %s to %s (%s)", pid, uid, uname)
os.setresuid(uid, uid, uid) # raises (good!) on error
exit_code += 1

try:
# Be paranoid by testing that we *can't* get back to orig_uid
os.setuid(orig_uid)
except PermissionError:
pass # good; the kernel has our backs now
pass # good; the kernel has our back now
else:
log.critical(
"Unexpectedly regained original privileges! (Should not have"
Expand All @@ -926,17 +1025,11 @@ def cmd_start_endpoint(
# This message is potentially (likely) sent back to the SDK; no
# sense in sharing the specifics (i.e., `msg`) beyond the
# administrator.
raise PermissionError("PermissionError: failed to start endpoint")
raise PermissionError("failed to start endpoint")
del orig_uid, orig_gid

exit_code += 1

# If we had any capabilities, we drop them now.
pyprctl.CapState().set_current()

# Even if exec'ing might return some privileges, "no."
pyprctl.set_no_new_privs()

# some Q&D verification for admin debugging purposes
if not shutil.which(proc_args[0], path=env["PATH"]):
log.warning(
Expand Down
Loading
Loading