Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cgroups api refactor for v2 #3096

Merged
merged 15 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
import subprocess
import sys
import threading

from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning

import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.event as event
Expand Down Expand Up @@ -206,18 +208,29 @@ def collect_logs(self, is_full_mode):

# Check the cgroups unit
log_collector_monitor = None
cgroups_api = SystemdCgroupsApi()
cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self")
cpu_cgroup_path = None
memory_cgroup_path = None
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)
try:
cgroup_api = get_cgroup_api()
except CGroupsException as e:
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self")
cpu_slice_matches = False
memory_slice_matches = False
if cpu_cgroup_path is not None:
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
if memory_cgroup_path is not None:
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)

if not cpu_slice_matches or not memory_slice_matches:
logger.info("The Log Collector process is not in the proper cgroups:")
log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False)
if not cpu_slice_matches:
logger.info("\tunexpected cpu slice")
log_cgroup_warning("\tunexpected cpu slice", send_event=False)
if not memory_slice_matches:
logger.info("\tunexpected memory slice")
log_cgroup_warning("\tunexpected memory slice", send_event=False)

sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

Expand Down
392 changes: 257 additions & 135 deletions azurelinuxagent/ga/cgroupapi.py

Large diffs are not rendered by default.

197 changes: 90 additions & 107 deletions azurelinuxagent/ga/cgroupconfigurator.py

Large diffs are not rendered by default.

Empty file.
7 changes: 0 additions & 7 deletions tests/data/cgroups/sys_fs_cgroup_unified_cgroup.controllers
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cgroup.controllers file is no longer read by the agent

This file was deleted.

1 change: 1 addition & 0 deletions tests/data/cgroups/v2/proc_pid_cgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0::/system.slice/Microsoft.A.Sample.Extension_1.0.1_aeac05dc-8c24-4542-95f2-a0d6be1c5ba7.scope
1 change: 1 addition & 0 deletions tests/data/cgroups/v2/proc_self_cgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0::/system.slice/walinuxagent.service
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cpuset cpu io memory pids
Empty file.
307 changes: 231 additions & 76 deletions tests/ga/test_cgroupapi.py

Large diffs are not rendered by default.

152 changes: 111 additions & 41 deletions tests/ga/test_cgroupconfigurator.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions tests/ga/test_cgroupconfigurator_sudo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes
from azurelinuxagent.common.future import ustr
from tests.lib.mock_cgroup_environment import mock_cgroup_environment
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment
from tests.lib.tools import AgentTestCase, patch, mock_sleep, i_am_root, is_python_version_26_or_34, skip_if_predicate_true


Expand All @@ -40,7 +40,7 @@ def _get_cgroup_configurator(self, initialize=True, enable=True, mock_commands=N
CGroupConfigurator._instance = None
configurator = CGroupConfigurator.get_instance()
CGroupsTelemetry.reset()
with mock_cgroup_environment(self.tmp_dir) as mock_environment:
with mock_cgroup_v1_environment(self.tmp_dir) as mock_environment:
if mock_commands is not None:
for command in mock_commands:
mock_environment.add_command(command)
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_tim
with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr:
with patch("azurelinuxagent.ga.extensionprocessutil.wait_for_process_completion_or_timeout",
return_value=[True, None, 0]):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupsApi._is_systemd_failure",
with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi._is_systemd_failure",
return_value=False):
with self.assertRaises(ExtensionError) as context_manager:
configurator.start_extension_command(
Expand Down
2 changes: 1 addition & 1 deletion tests/ga/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -1955,7 +1955,7 @@ def iterator(*_, **__):
with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'):
with patch('azurelinuxagent.ga.agent_update_handler.get_agent_update_handler'):
with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'):
with patch('azurelinuxagent.ga.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff
with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=False): # skip all cgroup stuff
with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True):
with patch('time.sleep'):
with patch('sys.exit'):
Expand Down
14 changes: 0 additions & 14 deletions tests/lib/cgroups_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,3 @@ def create_legacy_agent_cgroup(cgroups_file_system_root, controller, daemon_pid)
fileutil.append_file(os.path.join(legacy_cgroup, "cgroup.procs"), daemon_pid + "\n")
return legacy_cgroup

@staticmethod
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this wasn't being used anywhere

def create_agent_cgroup(cgroups_file_system_root, controller, extension_handler_pid):
"""
Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent;
starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent.

This method creates a mock cgroup using the newer path and adds the given PID to it.
"""
new_cgroup = os.path.join(cgroups_file_system_root, controller, "walinuxagent.service")
if not os.path.exists(new_cgroup):
os.makedirs(new_cgroup)
fileutil.append_file(os.path.join(new_cgroup, "cgroup.procs"), extension_handler_pid + "\n")
return new_cgroup

154 changes: 124 additions & 30 deletions tests/lib/mock_cgroup_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,11 @@
from tests.lib.tools import patch, data_dir
from tests.lib.mock_environment import MockEnvironment, MockCommand

_MOCKED_COMMANDS = [
# Mocked commands which are common between v1, v2, and hybrid cgroup environments
_MOCKED_COMMANDS_COMMON = [
MockCommand(r"^systemctl --version$",
'''systemd 237
+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid
'''),

MockCommand(r"^mount -t cgroup$",
'''cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd)
cgroup on /sys/fs/cgroup/rdma type cgroup (rw,nosuid,nodev,noexec,relatime,rdma)
cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset)
cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio)
cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event)
cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb)
cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer)
cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory)
cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids)
cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices)
cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct)
cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio)
'''),

MockCommand(r"^mount -t cgroup2$",
'''cgroup on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime)
'''),

MockCommand(r"^systemctl show walinuxagent\.service --property Slice",
Expand Down Expand Up @@ -77,10 +59,84 @@

]

_MOCKED_FILES = [
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'proc_self_cgroup')),
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'proc_pid_cgroup')),
("/sys/fs/cgroup/unified/cgroup.controllers", os.path.join(data_dir, 'cgroups', 'sys_fs_cgroup_unified_cgroup.controllers'))
_MOCKED_COMMANDS_V1 = [
MockCommand(r"^findmnt -t cgroup --noheadings$",
'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd
/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices
/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma
/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event
/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio
/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio
/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset
/sys/fs/cgroup/misc cgroup cgroup rw,nosuid,nodev,noexec,relatime,misc
/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct
/sys/fs/cgroup/memory cgroup cgroup rw,nosuid,nodev,noexec,relatime,memory
/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer
/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids
'''),

MockCommand(r"^findmnt -t cgroup2 --noheadings$", ''),

MockCommand(r"^stat -f --format=%T /sys/fs/cgroup$", 'tmpfs'),

]

_MOCKED_COMMANDS_V2 = [
MockCommand(r"^findmnt -t cgroup2 --noheadings$",
'''/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot
'''),

MockCommand(r"^findmnt -t cgroup --noheadings$", ''),

MockCommand(r"^stat -f --format=%T /sys/fs/cgroup$", 'cgroup2fs'),

]

_MOCKED_COMMANDS_HYBRID = [
MockCommand(r"^findmnt -t cgroup --noheadings$",
'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd
/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices
/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma
/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event
/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio
/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio
/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset
/sys/fs/cgroup/misc cgroup cgroup rw,nosuid,nodev,noexec,relatime,misc
/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct
/sys/fs/cgroup/memory cgroup cgroup rw,nosuid,nodev,noexec,relatime,memory
/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer
/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids
'''),

MockCommand(r"^findmnt -t cgroup2 --noheadings$",
'''/sys/fs/cgroup/unified cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate
'''),

MockCommand(r"^stat -f --format=%T /sys/fs/cgroup$", 'tmpfs'),

MockCommand(r"^stat -f --format=%T /sys/fs/cgroup/unified$", 'cgroup2fs'),

]

_MOCKED_FILES_V1 = [
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')),
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup'))
]

_MOCKED_FILES_V2 = [
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_self_cgroup')),
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')),
("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty'))
]

_MOCKED_FILES_HYBRID = [
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')),
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')),
("/sys/fs/cgroup/unified/cgroup.controllers", os.path.join(data_dir, 'cgroups', 'hybrid', 'sys_fs_cgroup_cgroup.controllers'))
]

_MOCKED_PATHS = [
Expand All @@ -106,18 +162,56 @@ class UnitFilePaths:


@contextlib.contextmanager
def mock_cgroup_environment(tmp_dir):
def mock_cgroup_v1_environment(tmp_dir):
"""
Creates a mock environment for cgroup v1 hierarchy used by the tests related to cgroups (currently it only
provides support for systemd platforms).
The command output used in __MOCKED_COMMANDS comes from an Ubuntu 20 system.
"""
data_files = [
(os.path.join(data_dir, 'init', 'walinuxagent.service'), UnitFilePaths.walinuxagent),
(os.path.join(data_dir, 'init', 'azure.slice'), UnitFilePaths.azure),
(os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions)
]

with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=True):
with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True):
with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS_COMMON + _MOCKED_COMMANDS_V1, paths=_MOCKED_PATHS, files=_MOCKED_FILES_V1, data_files=data_files) as mock:
yield mock


@contextlib.contextmanager
def mock_cgroup_v2_environment(tmp_dir):
"""
Creates a mock environment for cgroup v2 hierarchy used by the tests related to cgroups (currently it only
provides support for systemd platforms).
The command output used in __MOCKED_COMMANDS comes from an Ubuntu 22 system.
"""
data_files = [
(os.path.join(data_dir, 'init', 'walinuxagent.service'), UnitFilePaths.walinuxagent),
(os.path.join(data_dir, 'init', 'azure.slice'), UnitFilePaths.azure),
(os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions)
]

with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=True):
with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True):
with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS_COMMON + _MOCKED_COMMANDS_V2, paths=_MOCKED_PATHS, files=_MOCKED_FILES_V2, data_files=data_files) as mock:
yield mock


@contextlib.contextmanager
def mock_cgroup_hybrid_environment(tmp_dir):
"""
Creates a mock environment for cgroup hybrid hierarchy used by the tests related to cgroups (currently it only
provides support for systemd platforms).
"""
Creates a mocks environment used by the tests related to cgroups (currently it only provides support for systemd platforms).
The command output used in __MOCKED_COMMANDS comes from an Ubuntu 18 system.
"""
data_files = [
(os.path.join(data_dir, 'init', 'walinuxagent.service'), UnitFilePaths.walinuxagent),
(os.path.join(data_dir, 'init', 'azure.slice'), UnitFilePaths.azure),
(os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions)
]

with patch('azurelinuxagent.ga.cgroupapi.CGroupsApi.cgroups_supported', return_value=True):
with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=True):
with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True):
with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS, paths=_MOCKED_PATHS, files=_MOCKED_FILES, data_files=data_files) as mock:
with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS_COMMON + _MOCKED_COMMANDS_HYBRID, paths=_MOCKED_PATHS, files=_MOCKED_FILES_HYBRID, data_files=data_files) as mock:
yield mock
Loading
Loading