From 67b8d37090ddde8511be05f5c525bb3e3d147056 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Date: Tue, 1 Oct 2024 22:19:54 +0500 Subject: [PATCH 1/4] Try to gracefully stop docker on failover event --- .../middlewared/plugins/failover_/event.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/middlewared/middlewared/plugins/failover_/event.py b/src/middlewared/middlewared/plugins/failover_/event.py index e790d78550487..e2388232dbc24 100644 --- a/src/middlewared/middlewared/plugins/failover_/event.py +++ b/src/middlewared/middlewared/plugins/failover_/event.py @@ -796,6 +796,14 @@ def vrrp_backup(self, job, fobj, ifname, event): logger.warning('Entering BACKUP on "%s".', ifname) + # We will try to give some time to docker to gracefully stop before zpools will be forcefully + # exported. This is to avoid any potential data corruption. + stop_docker_thread = threading.Thread( + target=self.stop_apps, + name='failover_stop_docker', + ) + stop_docker_thread.start() + # We stop netdata before exporting pools because otherwise we might have erroneous stuff # getting logged and causing spam logger.info('Stopping reporting metrics') @@ -930,6 +938,18 @@ def vrrp_backup(self, job, fobj, ifname, event): return self.FAILOVER_RESULT + def stop_apps(self): + if not self.middleware.call_sync('docker.config')['dataset']: + return + + logger.info('Trying to gracefully stop docker service') + try: + self.run_call('service.stop', 'docker') + except Exception: + logger.error('Failed to stop docker service gracefully', exc_info=True) + else: + logger.info('Docker service stopped gracefully') + async def vrrp_fifo_hook(middleware, data): ifname = data['ifname'] From ebdff305d35b9796191e4575fe82f4c563e74c06 Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Date: Tue, 1 Oct 2024 22:32:35 +0500 Subject: [PATCH 2/4] Gracefully start apps when passive becomes active --- .../middlewared/plugins/failover_/event.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/middlewared/middlewared/plugins/failover_/event.py b/src/middlewared/middlewared/plugins/failover_/event.py index e2388232dbc24..25816870e89ad 100644 --- a/src/middlewared/middlewared/plugins/failover_/event.py +++ b/src/middlewared/middlewared/plugins/failover_/event.py @@ -737,6 +737,8 @@ def vrrp_master(self, job, fobj, ifname, event): self.run_call('kmip.initialize_keys') logger.info('Done syncing encryption keys with KMIP server') + self.start_apps() + logger.info('Migrating interface information (if required)') self.run_call('interface.persist_link_addresses') logger.info('Done migrating interface information (if required)') @@ -938,6 +940,30 @@ def vrrp_backup(self, job, fobj, ifname, event): return self.FAILOVER_RESULT + def start_apps(self): + pool = self.run_call('docker.config')['pool'] + if not pool: + logger.info('Skipping starting apps as they are not configured') + return + + logger.info('Going to initialize apps plugin as %r pool is configured for apps', pool) + logger.info('Mounting relevant docker datasets') + try: + self.run_call('docker.fs_manage.mount') + except Exception: + logger.error('Failed to mount docker datasets', exc_info=True) + return + else: + logger.info('Mounted docker datasets successfully') + + logger.info('Starting docker service') + try: + self.run_call('docker.state.start_service') + except Exception: + logger.error('Failed to start docker service', exc_info=True) + else: + logger.info('Docker service started successfully') + def stop_apps(self): if not self.middleware.call_sync('docker.config')['dataset']: return From bf1bda284672741e339144767d97eb0adae2f0ab Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Date: Wed, 9 Oct 2024 16:17:19 +0500 Subject: [PATCH 3/4] If docker is not set, make sure we update it's status properly --- src/middlewared/middlewared/plugins/failover_/event.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/middlewared/middlewared/plugins/failover_/event.py b/src/middlewared/middlewared/plugins/failover_/event.py index 25816870e89ad..1407d7776043d 100644 --- a/src/middlewared/middlewared/plugins/failover_/event.py +++ b/src/middlewared/middlewared/plugins/failover_/event.py @@ -16,6 +16,7 @@ from middlewared.service import Service, job, accepts from middlewared.service_exception import CallError from middlewared.schema import Dict, Bool, Int +from middlewared.plugins.docker.state_utils import Status # from middlewared.plugins.failover_.zpool_cachefile import ZPOOL_CACHE_FILE from middlewared.plugins.failover_.event_exceptions import AllZpoolsFailedToImport, IgnoreFailoverEvent, FencedError from middlewared.plugins.failover_.scheduled_reboot_alert import WATCHDOG_ALERT_FILE @@ -943,6 +944,7 @@ def vrrp_backup(self, job, fobj, ifname, event): def start_apps(self): pool = self.run_call('docker.config')['pool'] if not pool: + self.middleware.call_sync('docker.state.set_status', Status.UNCONFIGURED.value) logger.info('Skipping starting apps as they are not configured') return From 60a38b9b9049e653f9bab54276d15ef2dac94dde Mon Sep 17 00:00:00 2001 From: Waqar Ahmed Date: Wed, 9 Oct 2024 16:19:30 +0500 Subject: [PATCH 4/4] Make sure status is updated if docker datasets fail to mount --- src/middlewared/middlewared/plugins/failover_/event.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/middlewared/middlewared/plugins/failover_/event.py b/src/middlewared/middlewared/plugins/failover_/event.py index 1407d7776043d..b9f30e6b392ee 100644 --- a/src/middlewared/middlewared/plugins/failover_/event.py +++ b/src/middlewared/middlewared/plugins/failover_/event.py @@ -953,6 +953,7 @@ def start_apps(self): try: self.run_call('docker.fs_manage.mount') except Exception: + self.middleware.call_sync('docker.state.set_status', Status.FAILED.value, 'Failed to mount docker datasets') logger.error('Failed to mount docker datasets', exc_info=True) return else: