Skip to content

Commit

Permalink
get_cgroup_api should check if mountpoints are correct (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
maddieford authored Apr 4, 2024
1 parent 530ed56 commit 3cc51db
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 17 deletions.
45 changes: 41 additions & 4 deletions azurelinuxagent/ga/cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+

import json
import os
import re
import shutil
Expand Down Expand Up @@ -139,6 +139,15 @@ def __init__(self, msg=None):
super(SystemdRunError, self).__init__(msg)


class InvalidCgroupMountpointException(CGroupsException):
"""
Raised when the cgroup mountpoint is invalid.
"""

def __init__(self, msg=None):
super(InvalidCgroupMountpointException, self).__init__(msg)


def get_cgroup_api():
"""
Determines which version of Cgroup should be used for resource enforcement and monitoring by the Agent and returns
Expand All @@ -157,7 +166,7 @@ def get_cgroup_api():
if not os.path.exists(CGROUP_FILE_SYSTEM_ROOT):
v1_mount_point = shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings'])
v2_mount_point = shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings'])
raise CGroupsException("Expected cgroup filesystem to be mounted at '/sys/fs/cgroup', but it is not.\n v1 mount point: {0}\n v2 mount point: {1}".format(v1_mount_point, v2_mount_point))
raise InvalidCgroupMountpointException("Expected cgroup filesystem to be mounted at '{0}', but it is not.\n v1 mount point: \n{1}\n v2 mount point: \n{2}".format(CGROUP_FILE_SYSTEM_ROOT, v1_mount_point, v2_mount_point))

root_hierarchy_mode = shellutil.run_command(["stat", "-f", "--format=%T", CGROUP_FILE_SYSTEM_ROOT]).rstrip()

Expand All @@ -176,8 +185,14 @@ def get_cgroup_api():
if available_unified_controllers != "":
raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers))

cgroup_api = SystemdCgroupApiv1()
# Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd
# default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If either the cpu or memory
# controller is mounted in a location other than the systemd default, raise Exception.
if not cgroup_api.are_mountpoints_systemd_created():
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api.get_controller_root_paths())))
log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring")
return SystemdCgroupApiv1()
return cgroup_api

raise CGroupsException("Detected unknown cgroup mode: {0}".format(root_hierarchy_mode))

Expand Down Expand Up @@ -293,6 +308,9 @@ def _get_controller_mountpoints(self):
"""
mount_points = {}
for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines():
# In v2, we match only the systemd default mountpoint ('/sys/fs/cgroup'). In v1, we match any path. This
# is because the agent previously supported users mounting controllers at locations other than the systemd
# default in v1.
match = re.search(r'(?P<path>\S+\/(?P<controller>\S+))\s+cgroup', line)
if match is not None:
path = match.group('path')
Expand All @@ -301,6 +319,23 @@ def _get_controller_mountpoints(self):
mount_points[controller] = path
return mount_points

def are_mountpoints_systemd_created(self):
"""
Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if both cpu and memory
mountpoints match this pattern, False otherwise.
The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used.
This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in
files will be cleaned up in cgroupconfigurator.
"""
cpu_mountpoint = self._cgroup_mountpoints.get('cpu,cpuacct')
memory_mountpoint = self._cgroup_mountpoints.get('memory')
if cpu_mountpoint is not None and cpu_mountpoint != '/sys/fs/cgroup/cpu,cpuacct':
return False
if memory_mountpoint is not None and memory_mountpoint != '/sys/fs/cgroup/memory':
return False
return True

def get_controller_root_paths(self):
# Return a tuple representing the mountpoints for cpu and memory. Either should be None if the corresponding
# controller is not mounted.
Expand Down Expand Up @@ -428,7 +463,9 @@ def _get_root_cgroup_path():
"""
#
for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines():
match = re.search(r'(?P<path>/\S+)\s+cgroup2', line)
# Systemd mounts the cgroup filesystem at '/sys/fs/cgroup'. The agent does not support cgroups if the
# filesystem is mounted elsewhere, so search specifically for '/sys/fs/cgroup' in the findmnt output.
match = re.search(r'(?P<path>\/sys\/fs\/cgroup)\s+cgroup2', line)
if match is not None:
root_cgroup_path = match.group('path')
if root_cgroup_path is not None:
Expand Down
22 changes: 17 additions & 5 deletions azurelinuxagent/ga/cgroupconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup
from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, log_cgroup_info, log_cgroup_warning, get_cgroup_api
from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \
log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException, AgentMemoryExceededException
from azurelinuxagent.common.future import ustr
Expand Down Expand Up @@ -142,6 +143,11 @@ def initialize(self):
self._cgroups_supported = CGroupUtil.cgroups_supported()
if not self._cgroups_supported:
log_cgroup_info("Cgroup monitoring is not supported on {0}".format(get_distro()), send_event=True)
# If a distro is not supported, attempt to clean up any existing drop in files in case it was
# previously supported. It is necessary to cleanup in this scenario in case the OS hits any bugs on
# the kernel related to cgroups.
log_cgroup_info("Agent will reset the quotas in case distro: {0} went from supported to unsupported".format(get_distro()), send_event=False)
self._cleanup_agent_cgroup_drop_in_files()
return

# check that systemd is detected correctly
Expand All @@ -155,6 +161,15 @@ def initialize(self):
# do not enable resource monitoring/enforcement.
try:
self._cgroups_api = get_cgroup_api()
except InvalidCgroupMountpointException as e:
# Systemd mounts the cgroup file system at '/sys/fs/cgroup'. Previously, the agent supported cgroup
# usage if a user mounted the cgroup filesystem elsewhere. The agent no longer supports that
# scenario. Cleanup any existing drop in files in case the agent previously supported cgroups on
# this machine.
log_cgroup_warning("The agent does not support cgroups if the default systemd mountpoint is not being used: {0}".format(ustr(e)), send_event=True)
log_cgroup_info("Agent will reset the quotas in case cgroup usage went from enabled to disabled")
self._cleanup_agent_cgroup_drop_in_files()
return
except CGroupsException as e:
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
return
Expand Down Expand Up @@ -196,9 +211,6 @@ def initialize(self):
log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception)))
finally:
log_cgroup_info('Agent cgroups enabled: {0}'.format(self._agent_cgroups_enabled))
if not self._agent_cgroups_enabled:
log_cgroup_info("Agent will reset the quotas in case cgroups went from enabled to disabled")
self._reset_agent_cgroup_setup()
self._initialized = True

def __check_no_legacy_cgroups(self):
Expand Down Expand Up @@ -311,7 +323,7 @@ def __setup_azure_slice():

CGroupConfigurator._Impl.__reload_systemd_config()

def _reset_agent_cgroup_setup(self):
def _cleanup_agent_cgroup_drop_in_files(self):
try:
agent_drop_in_path = systemd.get_agent_drop_in_path()
if os.path.exists(agent_drop_in_path) and os.path.isdir(agent_drop_in_path):
Expand Down
69 changes: 61 additions & 8 deletions tests/ga/test_cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import tempfile

from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api
from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \
InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.osutil import systemd
from azurelinuxagent.common.utils import fileutil
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \
mock_cgroup_hybrid_environment
from tests.lib.mock_environment import MockCommand
from tests.lib.tools import AgentTestCase, patch, mock_sleep
from tests.lib.cgroups_tools import CGroupsTools

Expand Down Expand Up @@ -83,19 +85,18 @@ def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_an


class SystemdCgroupsApiTestCase(AgentTestCase):
def test_get_cgroup_api_is_v1_when_v1_in_use(self):
def test_get_cgroup_api_raises_exception_when_systemd_mount_point_does_not_exist(self):
with mock_cgroup_v1_environment(self.tmp_dir):
self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv1)
# Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check
with patch("os.path.exists", return_value=False):
with self.assertRaises(InvalidCgroupMountpointException) as context:
get_cgroup_api()
self.assertTrue("Expected cgroup filesystem to be mounted at '/sys/fs/cgroup', but it is not" in str(context.exception))

def test_get_cgroup_api_is_v2_when_v2_in_use(self):
with mock_cgroup_v2_environment(self.tmp_dir):
self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv2)

def test_get_cgroup_api_is_v1_when_hybrid_in_use(self):
with mock_cgroup_hybrid_environment(self.tmp_dir):
with patch("os.path.exists", return_value=True):
self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv1)

def test_get_cgroup_api_raises_exception_when_hybrid_in_use_and_controllers_available_in_unified_hierarchy(self):
with mock_cgroup_hybrid_environment(self.tmp_dir):
# Mock /sys/fs/cgroup/unified/cgroup.controllers file to have available controllers
Expand All @@ -105,6 +106,24 @@ def test_get_cgroup_api_raises_exception_when_hybrid_in_use_and_controllers_avai
get_cgroup_api()
self.assertTrue("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: cpu memory" in str(context.exception))

def test_get_cgroup_api_raises_exception_when_v1_in_use_and_controllers_have_non_sytemd_mountpoints(self):
with mock_cgroup_v1_environment(self.tmp_dir):
# Mock /sys/fs/cgroup/unified/cgroup.controllers file to have available controllers
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.are_mountpoints_systemd_created', return_value=False):
with self.assertRaises(InvalidCgroupMountpointException) as context:
get_cgroup_api()
self.assertTrue("Expected cgroup controllers to be mounted at '/sys/fs/cgroup', but at least one is not." in str(context.exception))

def test_get_cgroup_api_is_v1_when_v1_in_use(self):
with mock_cgroup_v1_environment(self.tmp_dir):
self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv1)

def test_get_cgroup_api_is_v1_when_hybrid_in_use(self):
with mock_cgroup_hybrid_environment(self.tmp_dir):
# Mock os.path.exists to return True for the os.path.exists('/sys/fs/cgroup/cgroup.controllers') check
with patch("os.path.exists", return_value=True):
self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv1)

def test_get_cgroup_api_raises_exception_when_cgroup_mode_cannot_be_determined(self):
unknown_cgroup_type = "unknown_cgroup_type"
with patch('azurelinuxagent.common.utils.shellutil.run_command', return_value=unknown_cgroup_type):
Expand Down Expand Up @@ -231,6 +250,29 @@ def test_get_controller_mountpoints_should_return_all_controller_mount_points(se
'pids': '/sys/fs/cgroup/pids',
}, "The controller mountpoints are not correct")

def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_are_not_systemd_mountpoints(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())

with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())

with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())

def test_are_mountpoints_systemd_created_should_return_True_if_cpu_and_memory_are_systemd_mountpoints(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup', 'memory': '/sys/fs/cgroup'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())

# are_mountpoints_systemd_created should only check controllers which are mounted
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())

with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())

def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self')
Expand Down Expand Up @@ -328,6 +370,17 @@ def test_get_root_cgroup_path_should_return_v2_cgroup_root(self):
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup')

def test_get_root_cgroup_path_should_only_match_systemd_mountpoint(self):
with mock_cgroup_v2_environment(self.tmp_dir) as env:
# Mock an environment which has multiple v2 mountpoints
env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$",
'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime
/custom/mountpoint/path2 none cgroup2 rw,relatime
'''))
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup')

def test_get_unit_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
Expand Down

0 comments on commit 3cc51db

Please sign in to comment.