From 6009c650a9db42797102228312188745f2f39ead Mon Sep 17 00:00:00 2001 From: Maria Dyuldina Date: Wed, 27 Sep 2023 15:16:51 +0200 Subject: [PATCH 1/5] Add configurable timeout for zk_lock retry --- ch_backup/config.py | 1 + ch_backup/logic/lock_manager.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ch_backup/config.py b/ch_backup/config.py index bc071203..c7f1153d 100644 --- a/ch_backup/config.py +++ b/ch_backup/config.py @@ -209,6 +209,7 @@ def _as_seconds(t: str) -> int: "flock_path": "/tmp/flock.lock", "zk_flock_path": "/tmp/zk_flock.lock", "exitcode": 0, + "lock_timeout": _as_seconds("1 min"), }, } diff --git a/ch_backup/logic/lock_manager.py b/ch_backup/logic/lock_manager.py index c07f584e..712c2463 100644 --- a/ch_backup/logic/lock_manager.py +++ b/ch_backup/logic/lock_manager.py @@ -41,6 +41,7 @@ def __init__(self, lock_conf: dict, zk_ctl: Optional[ZookeeperCTL]) -> None: self._exitcode = lock_conf.get("exitcode") self._distributed = zk_ctl is None self._disabled = False + self._lock_timeout = lock_conf.get("lock_timeout") def __call__(self, distributed: bool = True, disabled: bool = False): # type: ignore """ @@ -106,7 +107,7 @@ def _zk_flock(self) -> None: if not client.exists(self._process_zk_lockfile_path): client.create(self._process_zk_lockfile_path, makepath=True) self._zk_lock = client.Lock(self._process_zk_lockfile_path) - if not self._zk_lock.acquire(blocking=False): + if not self._zk_lock.acquire(blocking=True, timeout=self._lock_timeout): sys.exit(self._exitcode) else: raise RuntimeError("ZK flock enabled, but zookeeper is not configured") From ef0322b5b5d88a2aa71d0b30db9dc48c2d9ff80f Mon Sep 17 00:00:00 2001 From: Maria Dyuldina Date: Wed, 27 Sep 2023 15:45:53 +0200 Subject: [PATCH 2/5] Fix linter --- ch_backup/logic/lock_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ch_backup/logic/lock_manager.py b/ch_backup/logic/lock_manager.py index 712c2463..1c085591 100644 --- a/ch_backup/logic/lock_manager.py +++ b/ch_backup/logic/lock_manager.py @@ -18,7 +18,7 @@ class LockManager: Lock manager class """ - # pylint: disable=consider-using-with + # pylint: disable=consider-using-with,too-many-instance-attributes _lock_conf: dict _process_lockfile_path: str From 9cc5e9449246801b614b81c9358e9d00032a6263 Mon Sep 17 00:00:00 2001 From: Filippo Monari Date: Thu, 2 Nov 2023 15:21:44 +0100 Subject: [PATCH 3/5] Added exception handling in _zk_flock --- ch_backup/logic/lock_manager.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ch_backup/logic/lock_manager.py b/ch_backup/logic/lock_manager.py index 1c085591..c828b580 100644 --- a/ch_backup/logic/lock_manager.py +++ b/ch_backup/logic/lock_manager.py @@ -7,6 +7,7 @@ from types import TracebackType from typing import IO, Optional, Type +from kazoo.exceptions import LockTimeout from kazoo.recipe.lock import Lock from ch_backup import logging @@ -107,7 +108,23 @@ def _zk_flock(self) -> None: if not client.exists(self._process_zk_lockfile_path): client.create(self._process_zk_lockfile_path, makepath=True) self._zk_lock = client.Lock(self._process_zk_lockfile_path) - if not self._zk_lock.acquire(blocking=True, timeout=self._lock_timeout): - sys.exit(self._exitcode) + force_exit = self._exitcode == 0 + try: + if not self._zk_lock.acquire(blocking=True, timeout=self._lock_timeout): + fail_on_lock_acquiring(force_exit) + except LockTimeout as e: + fail_on_lock_acquiring(force_exit, e) else: raise RuntimeError("ZK flock enabled, but zookeeper is not configured") + + +def fail_on_lock_acquiring(force_exit: bool, e: Optional[Exception] = None): + """ + Determines how to exit from the lock acquisition process + """ + if force_exit: + sys.exit(0) + err = RuntimeError("Lock was not acquired") + if e is not None: + raise err from e + raise err From 53959999274f3229013dcb9980f1c2d3743cf4e8 Mon Sep 17 00:00:00 2001 From: Filippo Monari Date: Fri, 3 Nov 2023 12:31:25 +0100 Subject: [PATCH 4/5] Rebased onto lastest main and fixed linting --- ch_backup/logic/lock_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ch_backup/logic/lock_manager.py b/ch_backup/logic/lock_manager.py index c828b580..089f1c8e 100644 --- a/ch_backup/logic/lock_manager.py +++ b/ch_backup/logic/lock_manager.py @@ -118,7 +118,7 @@ def _zk_flock(self) -> None: raise RuntimeError("ZK flock enabled, but zookeeper is not configured") -def fail_on_lock_acquiring(force_exit: bool, e: Optional[Exception] = None): +def fail_on_lock_acquiring(force_exit: bool, e: Optional[Exception] = None) -> None: """ Determines how to exit from the lock acquisition process """ From 72b3c8242a222188356edf782b33f11025659d04 Mon Sep 17 00:00:00 2001 From: Filippo Monari Date: Wed, 8 Nov 2023 17:42:50 +0100 Subject: [PATCH 5/5] Restoring _exit_code functioning. Fixing if statement in case of timeout --- ch_backup/logic/lock_manager.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/ch_backup/logic/lock_manager.py b/ch_backup/logic/lock_manager.py index 089f1c8e..f570df0b 100644 --- a/ch_backup/logic/lock_manager.py +++ b/ch_backup/logic/lock_manager.py @@ -108,23 +108,13 @@ def _zk_flock(self) -> None: if not client.exists(self._process_zk_lockfile_path): client.create(self._process_zk_lockfile_path, makepath=True) self._zk_lock = client.Lock(self._process_zk_lockfile_path) - force_exit = self._exitcode == 0 try: - if not self._zk_lock.acquire(blocking=True, timeout=self._lock_timeout): - fail_on_lock_acquiring(force_exit) - except LockTimeout as e: - fail_on_lock_acquiring(force_exit, e) + _ = self._zk_lock.acquire(blocking=True, timeout=self._lock_timeout) + logging.debug("Lock was acquired") + except LockTimeout: + logging.error( + "Lock was not acquired due to timeout error.", exc_info=True + ) + sys.exit(self._exitcode) else: raise RuntimeError("ZK flock enabled, but zookeeper is not configured") - - -def fail_on_lock_acquiring(force_exit: bool, e: Optional[Exception] = None) -> None: - """ - Determines how to exit from the lock acquisition process - """ - if force_exit: - sys.exit(0) - err = RuntimeError("Lock was not acquired") - if e is not None: - raise err from e - raise err