From 334dbd6e75b6dc631e8bed7025196f77dfb5e674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sat, 19 Oct 2024 16:52:52 +0200 Subject: [PATCH 1/7] tests: set root log level to DEBUG during tests Ease diagnosing test failures. --- qubes/tests/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qubes/tests/__init__.py b/qubes/tests/__init__.py index c20d0c073..305733701 100644 --- a/qubes/tests/__init__.py +++ b/qubes/tests/__init__.py @@ -750,6 +750,7 @@ def setUp(self): ha_syslog.setFormatter( logging.Formatter('%(name)s[%(process)d]: %(message)s')) logging.root.addHandler(ha_syslog) + logging.root.setLevel("DEBUG") self.log.critical('starting') From f42f5b6315088221ea5f563efd4cef9ecf818f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 13 Oct 2024 15:11:12 +0200 Subject: [PATCH 2/7] DEBUG: log list of VMs when startup failed And also basic xen info (including free memory) --- qubes/vm/qubesvm.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/qubes/vm/qubesvm.py b/qubes/vm/qubesvm.py index dd76c4a00..c40100a31 100644 --- a/qubes/vm/qubesvm.py +++ b/qubes/vm/qubesvm.py @@ -1222,8 +1222,13 @@ async def start(self, start_guid=True, notify_function=None, self._update_libvirt_domain() + free_mem = subprocess.check_output(["xl", "info", "free_memory"]).decode() + self.log.debug("free mem before start: %s", free_mem) + subprocess.run(["xl", "list"]) self.libvirt_domain.createWithFlags( libvirt.VIR_DOMAIN_START_PAUSED) + free_mem = subprocess.check_output(["xl", "info", "free_memory"]).decode() + self.log.debug("free mem after start: %s", free_mem) # the above allocates xid, lets announce that self.fire_event('property-reset:xid', name='xid') @@ -1239,6 +1244,10 @@ async def start(self, start_guid=True, notify_function=None, exc = qubes.exc.QubesException( 'Failed to start an HVM qube with PCI devices assigned ' '- hardware does not support IOMMU/VT-d/AMD-Vi') + free_mem = subprocess.check_output(["xl", "info", "free_memory"]).decode() + self.log.debug("free mem after failed start: %s", free_mem) + subprocess.run(["xl", "list"]) + subprocess.run(["xl", "info"]) self.log.error('Start failed: %s', str(exc)) await self.fire_event_async('domain-start-failed', reason=str(exc)) @@ -1757,6 +1766,7 @@ def request_memory(self, mem_required=None): initial_memory = self.memory mem_required = int(initial_memory + stubdom_mem) * 1024 * 1024 + self.log.debug(f"mem required: {mem_required}") qmemman_client = qubes.qmemman.client.QMemmanClient() try: mem_required_with_overhead = mem_required + MEM_OVERHEAD_BASE \ @@ -1767,6 +1777,7 @@ def request_memory(self, mem_required=None): mem_required_with_overhead += self.maxmem * 4096 got_memory = qmemman_client.request_memory( mem_required_with_overhead) + self.log.debug(f"mem required with overhead: {mem_required_with_overhead}") except IOError as e: raise IOError('Failed to connect to qmemman: {!s}'.format(e)) From 7eea85e76b843dfa17e03a5f2fed0c7fc6b9be5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 20 Oct 2024 05:29:09 +0200 Subject: [PATCH 3/7] Update Xen's memory overhead formula Experiments show that using memory hotplug or populate-on-demand makes no difference in required memory at startup. But PV vs PVH/HVM does make a difference - PV doesn't need extra per-MB overhead at all. On top of that, experimentally find the correct factor. Do it by starting VM (paused) with different parameters and compare `xl info free_memory` before and after. memory / maxmem: difference 400 / 4000: 434 600 / 4000: 634 400 / 400: 405 400 / 600: 407 400 / 2000: 418 2000 / 2000: 2018 600 / 600: 607 All above are with 2 vcpus. Testing with other vcpus count shows the 1.5MB per vcpu is quite accurate. As seen above, the initial memory doesn't affect the overhead. The maxmem counts. Applying linear regression to that shows it's about 0.008MB per MB of maxmem, so round it up to 8192 bytes. The base overhead of 4MB doesn't match exactly, but since the calculated number is smaller, leave it at 4MB as a safety margin. Fixes QubesOS/qubes-issues#9431 --- qubes/vm/qubesvm.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/qubes/vm/qubesvm.py b/qubes/vm/qubesvm.py index c40100a31..5001838e7 100644 --- a/qubes/vm/qubesvm.py +++ b/qubes/vm/qubesvm.py @@ -1771,10 +1771,11 @@ def request_memory(self, mem_required=None): try: mem_required_with_overhead = mem_required + MEM_OVERHEAD_BASE \ + self.vcpus * MEM_OVERHEAD_PER_VCPU - if self.use_memory_hotplug: - # extra overhead to account future hotplug memory - # 1 page per 1MB of RAM, see libxl__get_required_paging_memory() - mem_required_with_overhead += self.maxmem * 4096 + maxmem = self.maxmem if self.maxmem else self.memory + if self.virt_mode != "pv": + # extra overhead to account (possibly future hotplug) memory + # 2 pages per 1MB of RAM, see libxl__get_required_paging_memory() + mem_required_with_overhead += maxmem * 8192 got_memory = qmemman_client.request_memory( mem_required_with_overhead) self.log.debug(f"mem required with overhead: {mem_required_with_overhead}") From b6421d0f78293d401226c05c09cfcdbd3099b526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 22 Oct 2024 13:41:46 +0200 Subject: [PATCH 4/7] qmemman: refresh domain list directly instead of scheduling ... for the next watcher loop iteration. If two VMs are started in parallel, there may be no watcher loop iteration between handling their requests. This means the memory request for the second VM will operate on outdated list of VMs and may not account for some allocations (assume memory is free, while in fact it's already allocated to another VM). If that happens, the second VM may fail to start due to out of memory error. This is very similar problem as described in QubesOS/qubes-issues#1389, but affects actual VM startup, not its auxiliary processes. Fixes QubesOS/qubes-issues#9431 --- qubes/tools/qmemmand.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/qubes/tools/qmemmand.py b/qubes/tools/qmemmand.py index df61f172f..c348231ac 100644 --- a/qubes/tools/qmemmand.py +++ b/qubes/tools/qmemmand.py @@ -20,6 +20,7 @@ # pylint: disable=global-statement import configparser +import functools import socketserver import logging import logging.handlers @@ -41,15 +42,6 @@ system_state = qubes.qmemman.systemstate.SystemState() global_lock = threading.Lock() -# If XSWatcher will -# handle meminfo event before @introduceDomain, it will use -# incomplete domain list for that and may redistribute memory -# allocated to some VM, but not yet used (see #1389). -# To fix that, system_state should be updated (refresh domain -# list) before processing other changes, every time some process requested -# memory for a new VM, before releasing the lock. Then XS_Watcher will check -# this flag before processing other event. -force_refresh_domain_list = False def only_in_first_list(list1, list2): ret = [] @@ -153,10 +145,6 @@ def meminfo_changed(self, domain_id): with global_lock: self.log.debug('global_lock acquired') try: - global force_refresh_domain_list - if force_refresh_domain_list: - self.domain_list_changed(refresh_only=True) - force_refresh_domain_list = False if domain_id not in self.watch_token_dict: # domain just destroyed return @@ -184,6 +172,10 @@ class QMemmanReqHandler(socketserver.BaseRequestHandler): client. """ + def __init__(self, watcher: XSWatcher, request, client_address, server): + self.watcher = watcher + super().__init__(request, client_address, server) + def handle(self): self.log = logging.getLogger('qmemman.daemon.reqhandler') @@ -196,8 +188,10 @@ def handle(self): if len(self.data) == 0: self.log.info('client disconnected, resuming membalance') if got_lock: - global force_refresh_domain_list - force_refresh_domain_list = True + # got_lock = True means some VM may have been just created + # ensure next watch or client request handler will use + # updated domain list + self.watcher.domain_list_changed(refresh_only=True) return # XXX something is wrong here: return without release? @@ -290,7 +284,11 @@ def main(): # Initialize the connection to Xen and to XenStore system_state.init() - server = socketserver.UnixStreamServer(SOCK_PATH, QMemmanReqHandler) + watcher = XSWatcher() + server = socketserver.UnixStreamServer( + SOCK_PATH, + functools.partial(QMemmanReqHandler, watcher) + ) os.umask(0o077) # notify systemd @@ -305,4 +303,4 @@ def main(): sock.close() threading.Thread(target=server.serve_forever).start() - XSWatcher().watch_loop() + watcher.watch_loop() From 820e8c75aebd96b4f2922264b272cd6c2b0afa32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 23 Oct 2024 12:54:16 +0200 Subject: [PATCH 5/7] DEBUG: raise default qmemman log level --- etc/qmemman.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/qmemman.conf b/etc/qmemman.conf index e86c158f5..0175ad97a 100644 --- a/etc/qmemman.conf +++ b/etc/qmemman.conf @@ -14,4 +14,4 @@ cache-margin-factor = 1.3 # log-level - Warning 30, Info 20, Debug 10 # Default: 30 -log-level = 30 +log-level = 20 From 4e3818eed1ec40ddd3e99c380cfcd438c246282a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 23 Oct 2024 23:53:36 +0200 Subject: [PATCH 6/7] DEBUG: more qmemman debugging --- qubes/tools/qmemmand.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/qubes/tools/qmemmand.py b/qubes/tools/qmemmand.py index c348231ac..e3b70b5ce 100644 --- a/qubes/tools/qmemmand.py +++ b/qubes/tools/qmemmand.py @@ -86,11 +86,11 @@ def domain_list_changed(self, refresh_only=False): got_lock = False if not refresh_only: - self.log.debug('acquiring global_lock') + self.log.info('acquiring global_lock - list changed') # pylint: disable=consider-using-with global_lock.acquire() got_lock = True - self.log.debug('global_lock acquired') + self.log.info('global_lock acquired - list changed') try: curr = self.handle.ls('', '/local/domain') if curr is None: @@ -125,7 +125,7 @@ def domain_list_changed(self, refresh_only=False): finally: if got_lock: global_lock.release() - self.log.debug('global_lock released') + self.log.info('global_lock released - list changed') if not refresh_only: try: @@ -141,9 +141,9 @@ def meminfo_changed(self, domain_id): if untrusted_meminfo_key is None or untrusted_meminfo_key == b'': return - self.log.debug('acquiring global_lock') + self.log.info('acquiring global_lock - meminfo changed') with global_lock: - self.log.debug('global_lock acquired') + self.log.info('global_lock acquired - meminfo changed') try: if domain_id not in self.watch_token_dict: # domain just destroyed @@ -152,7 +152,7 @@ def meminfo_changed(self, domain_id): system_state.refresh_meminfo(domain_id, untrusted_meminfo_key) except: # pylint: disable=bare-except self.log.exception('Updating meminfo for %s failed', domain_id) - self.log.debug('global_lock released') + self.log.info('global_lock released - meminfo changed') def watch_loop(self): self.log.debug('watch_loop()') @@ -184,7 +184,7 @@ def handle(self): # self.request is the TCP socket connected to the client while True: self.data = self.request.recv(1024).strip() - self.log.debug('data=%r', self.data) + self.log.info('data=%r', self.data) if len(self.data) == 0: self.log.info('client disconnected, resuming membalance') if got_lock: @@ -199,10 +199,10 @@ def handle(self): self.log.warning('Second request over qmemman.sock?') return - self.log.debug('acquiring global_lock') + self.log.info('acquiring global_lock - handle') # pylint: disable=consider-using-with global_lock.acquire() - self.log.debug('global_lock acquired') + self.log.info('global_lock acquired - handle') got_lock = True if (self.data.isdigit() and @@ -218,7 +218,7 @@ def handle(self): finally: if got_lock: global_lock.release() - self.log.debug('global_lock released') + self.log.info('global_lock released - handle') parser = qubes.tools.QubesArgumentParser(want_app=False) From efb85e2e32a0d4e6eabbed35177a258db2d2790d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 24 Oct 2024 04:34:56 +0200 Subject: [PATCH 7/7] qmemman: always execute do_balance() under a lock Any memory adjustments must be done while holding a lock, to not interfere with client request handling. This is critical to prevent memory just freed for a new VM being re-allocated elsewhere. The domain_list_changed() function failed to do that - do_balance call was done after releasing the lock. It wasn't a problem for a long time because of Python's global interpreter lock. But Python 3.13 is finally starting to support proper parallel thread execution, and it revealed this bug. Fixes QubesOS/qubes-issues#9431 --- qubes/tools/qmemmand.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/qubes/tools/qmemmand.py b/qubes/tools/qmemmand.py index e3b70b5ce..2c901c791 100644 --- a/qubes/tools/qmemmand.py +++ b/qubes/tools/qmemmand.py @@ -120,6 +120,12 @@ def domain_list_changed(self, refresh_only=False): self.watch_token_dict[i]) self.watch_token_dict.pop(i) system_state.del_domain(i) + + if not refresh_only: + try: + system_state.do_balance() + except: # pylint: disable=bare-except + self.log.exception('do_balance() failed') except: # pylint: disable=bare-except self.log.exception('Updating domain list failed') finally: @@ -127,13 +133,6 @@ def domain_list_changed(self, refresh_only=False): global_lock.release() self.log.info('global_lock released - list changed') - if not refresh_only: - try: - system_state.do_balance() - except: # pylint: disable=bare-except - self.log.exception('do_balance() failed') - - def meminfo_changed(self, domain_id): self.log.debug('meminfo_changed(domain_id={!r})'.format(domain_id)) untrusted_meminfo_key = self.handle.read(