Skip to content

Commit

Permalink
Fix delete memory usage (#79)
Browse files Browse the repository at this point in the history
* Disable diagnose flag for exceptions and fixed time format.

* Reduce memory usage for delete and purge command

* Review changes
  • Loading branch information
MikhailBurdukov authored Oct 19, 2023
1 parent 4aca14f commit 997bbd7
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
12 changes: 8 additions & 4 deletions ch_backup/backup/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,17 +274,21 @@ def deduplicate_part(


def collect_dedup_references_for_batch_backup_deletion(
retained_backups: List[BackupMetadata],
deleting_backups: List[BackupMetadata],
layout: BackupLayout,
retained_backups_light_meta: List[BackupMetadata],
deleting_backups_light_meta: List[BackupMetadata],
) -> Dict[str, DedupReferences]:
"""
Collect deduplication information for deleting multiple backups. It contains names of data parts that should
pe preserved during deletion.
"""
dedup_references: Dict[str, DedupReferences] = defaultdict(dict)

deleting_backup_name_resolver = {b.path: b.name for b in deleting_backups}
for backup in retained_backups:
deleting_backup_name_resolver = {
b.path: b.name for b in deleting_backups_light_meta
}
for backup in retained_backups_light_meta:
backup = layout.reload_backup(backup, use_light_meta=False)
for db_name in backup.get_databases():
for table in backup.get_tables(db_name):
for part in table.get_parts():
Expand Down
25 changes: 16 additions & 9 deletions ch_backup/ch_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,9 @@ def delete(
deleting_backups = []
retained_backups = []
with self._context.locker():
# Use light metadata in backups iteration to avoid high memory usage.
for i, backup in enumerate(
self._context.backup_layout.get_backups(use_light_meta=False)
self._context.backup_layout.get_backups(use_light_meta=True)
):
if backup.name == backup_name:
deleting_backups.append(backup)
Expand All @@ -329,8 +330,9 @@ def delete(
raise BackupNotFound(backup_name)

dedup_references = collect_dedup_references_for_batch_backup_deletion(
retained_backups=retained_backups,
deleting_backups=deleting_backups,
layout=self._context.backup_layout,
retained_backups_light_meta=retained_backups,
deleting_backups_light_meta=deleting_backups,
)

result: Tuple[Optional[str], Optional[str]] = (None, None)
Expand Down Expand Up @@ -362,7 +364,8 @@ def purge(self) -> Tuple[Sequence[str], Optional[str]]:
deleting_backups: List[BackupMetadata] = []
backup_names = self._context.backup_layout.get_backup_names()
with self._context.locker():
for backup in self._context.backup_layout.get_backups(use_light_meta=False):
# Use light metadata in backups iteration to avoid high memory usage.
for backup in self._context.backup_layout.get_backups(use_light_meta=True):
if backup.name not in backup_names:
logging.info("Deleting backup without metadata: {}", backup.name)
self._context.backup_layout.delete_backup(backup.name)
Expand Down Expand Up @@ -391,8 +394,9 @@ def purge(self) -> Tuple[Sequence[str], Optional[str]]:
deleting_backups.append(backup)

dedup_references = collect_dedup_references_for_batch_backup_deletion(
retained_backups=retained_backups,
deleting_backups=deleting_backups,
layout=self._context.backup_layout,
retained_backups_light_meta=retained_backups,
deleting_backups_light_meta=deleting_backups,
)

for backup in deleting_backups:
Expand All @@ -414,12 +418,15 @@ def fix_admin_user(self, dry_run: bool = True) -> None:
self._access_backup_manager.fix_admin_user(self._context, dry_run)

def _delete(
self, backup: BackupMetadata, dedup_references: DedupReferences
self, backup_light_meta: BackupMetadata, dedup_references: DedupReferences
) -> Tuple[Optional[str], Optional[str]]:
logging.info(
"Deleting backup {}, state: {}",
backup.name,
backup.state,
backup_light_meta.name,
backup_light_meta.state,
)
backup = self._context.backup_layout.reload_backup(
backup_light_meta, use_light_meta=False
)
backup.state = BackupState.DELETING
self._context.backup_layout.upload_backup_metadata(backup)
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/test_deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,9 @@ def test_collect_dedup_references_for_batch_backup_deletion(

assert (
collect_dedup_references_for_batch_backup_deletion(
retained_backups=retained_backups,
deleting_backups=deleting_backups,
layout=layout_mock(),
retained_backups_light_meta=retained_backups,
deleting_backups_light_meta=deleting_backups,
)
== result
)
Expand Down

0 comments on commit 997bbd7

Please sign in to comment.