From 2aa1ef3405afb343120151f5d7d347a78ff76a75 Mon Sep 17 00:00:00 2001 From: sonicaj Date: Thu, 10 Oct 2024 01:40:52 +0500 Subject: [PATCH] NAS-131158 / 25.04 / Get apps to work in HA (#14605) * Try to gracefully stop docker on failover event * Gracefully start apps when passive becomes active * If docker is not set, make sure we update it's status properly * Make sure status is updated if docker datasets fail to mount --- .../middlewared/plugins/failover_/event.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/middlewared/middlewared/plugins/failover_/event.py b/src/middlewared/middlewared/plugins/failover_/event.py index e790d78550487..b9f30e6b392ee 100644 --- a/src/middlewared/middlewared/plugins/failover_/event.py +++ b/src/middlewared/middlewared/plugins/failover_/event.py @@ -16,6 +16,7 @@ from middlewared.service import Service, job, accepts from middlewared.service_exception import CallError from middlewared.schema import Dict, Bool, Int +from middlewared.plugins.docker.state_utils import Status # from middlewared.plugins.failover_.zpool_cachefile import ZPOOL_CACHE_FILE from middlewared.plugins.failover_.event_exceptions import AllZpoolsFailedToImport, IgnoreFailoverEvent, FencedError from middlewared.plugins.failover_.scheduled_reboot_alert import WATCHDOG_ALERT_FILE @@ -737,6 +738,8 @@ def vrrp_master(self, job, fobj, ifname, event): self.run_call('kmip.initialize_keys') logger.info('Done syncing encryption keys with KMIP server') + self.start_apps() + logger.info('Migrating interface information (if required)') self.run_call('interface.persist_link_addresses') logger.info('Done migrating interface information (if required)') @@ -796,6 +799,14 @@ def vrrp_backup(self, job, fobj, ifname, event): logger.warning('Entering BACKUP on "%s".', ifname) + # We will try to give some time to docker to gracefully stop before zpools will be forcefully + # exported. This is to avoid any potential data corruption. + stop_docker_thread = threading.Thread( + target=self.stop_apps, + name='failover_stop_docker', + ) + stop_docker_thread.start() + # We stop netdata before exporting pools because otherwise we might have erroneous stuff # getting logged and causing spam logger.info('Stopping reporting metrics') @@ -930,6 +941,44 @@ def vrrp_backup(self, job, fobj, ifname, event): return self.FAILOVER_RESULT + def start_apps(self): + pool = self.run_call('docker.config')['pool'] + if not pool: + self.middleware.call_sync('docker.state.set_status', Status.UNCONFIGURED.value) + logger.info('Skipping starting apps as they are not configured') + return + + logger.info('Going to initialize apps plugin as %r pool is configured for apps', pool) + logger.info('Mounting relevant docker datasets') + try: + self.run_call('docker.fs_manage.mount') + except Exception: + self.middleware.call_sync('docker.state.set_status', Status.FAILED.value, 'Failed to mount docker datasets') + logger.error('Failed to mount docker datasets', exc_info=True) + return + else: + logger.info('Mounted docker datasets successfully') + + logger.info('Starting docker service') + try: + self.run_call('docker.state.start_service') + except Exception: + logger.error('Failed to start docker service', exc_info=True) + else: + logger.info('Docker service started successfully') + + def stop_apps(self): + if not self.middleware.call_sync('docker.config')['dataset']: + return + + logger.info('Trying to gracefully stop docker service') + try: + self.run_call('service.stop', 'docker') + except Exception: + logger.error('Failed to stop docker service gracefully', exc_info=True) + else: + logger.info('Docker service stopped gracefully') + async def vrrp_fifo_hook(middleware, data): ifname = data['ifname']