Skip to content

Commit

Permalink
borg extract: add --continue flag
Browse files Browse the repository at this point in the history
  • Loading branch information
enkore committed Sep 30, 2016
1 parent f276e30 commit 6da395b
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 14 deletions.
33 changes: 23 additions & 10 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import errno
import io
import os
import socket
import stat
Expand All @@ -22,7 +23,7 @@
from .compress import Compressor
from .constants import * # NOQA
from .hashindex import ChunkIndex, ChunkIndexEntry
from .helpers import Manifest
from .helpers import Manifest, slice_chunks
from .helpers import Chunk, ChunkIteratorFileWrapper, open_item
from .helpers import Error, IntegrityError
from .helpers import uid2user, user2uid, gid2group, group2gid
Expand Down Expand Up @@ -428,7 +429,7 @@ def add_file_chunks(chunks):
return stats

def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, stripped_components=0, original_path=None, pi=None):
complete_partial=False, stripped_components=0, hardlink_masters=None, original_path=None, pi=None):
"""
Extract archive item.
Expand All @@ -437,6 +438,7 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
:param dry_run: do not write any data
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param complete_partial: False: replace files, True: existing files are completed
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param stripped_components: stripped leading path components to correct hard link extraction
:param original_path: 'path' key as stored in archive
Expand Down Expand Up @@ -466,10 +468,11 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
# Attempt to remove existing files, ignore errors on failure
try:
st = os.lstat(path)
if stat.S_ISDIR(st.st_mode):
os.rmdir(path)
else:
os.unlink(path)
if not complete_partial:
if stat.S_ISDIR(st.st_mode):
os.rmdir(path)
else:
os.unlink(path)
except UnicodeEncodeError:
raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
except OSError:
Expand All @@ -496,9 +499,21 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
return
# Extract chunks, since the item which had the chunks was not extracted
with backup_io():
fd = open(path, 'wb')
fd = open(path, 'ab')
with fd:
ids = [c.id for c in item.chunks]
chunks = item.chunks
if complete_partial:
with backup_io():
fd.seek(0, io.SEEK_END)
existing_length = fd.tell()
# Slice chunks by current length of the existing file.
chunks, prefix_length = slice_chunks(chunks, maximum_length=existing_length)
# We don't bother extracting fractional chunks. Just seek to a chunk boundary.
fd.seek(prefix_length)
fd.truncate()
if pi:
pi.show(increase=prefix_length)
ids = [c.id for c in chunks]
for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
if pi:
pi.show(increase=len(data))
Expand All @@ -509,8 +524,6 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
else:
fd.write(data)
with backup_io():
pos = fd.tell()
fd.truncate(pos)
fd.flush()
self.restore_attrs(path, item, fd=fd.fileno())
if has_damaged_chunks:
Expand Down
13 changes: 11 additions & 2 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,11 +487,16 @@ def do_extract(self, args, repository, manifest, key, archive):
dry_run = args.dry_run
stdout = args.stdout
sparse = args.sparse
continue_ = args.continue_
strip_components = args.strip_components
dirs = []
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None

if stdout and continue_:
self.print_error('Cannot combine --stdout and --continue.')
return self.exit_code

def peek_and_store_hardlink_masters(item, matched):
if (partial_extract and not matched and stat.S_ISREG(item.mode) and
item.get('hardlink_master', True) and 'source' not in item):
Expand Down Expand Up @@ -527,8 +532,9 @@ def peek_and_store_hardlink_masters(item, matched):
dirs.append(item)
archive.extract_item(item, restore_attrs=False)
else:
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
stripped_components=strip_components, original_path=orig_path, pi=pi)
archive.extract_item(item, stdout=stdout, complete_partial=continue_, sparse=sparse,
hardlink_masters=hardlink_masters, original_path=orig_path, pi=pi,
stripped_components=strip_components)
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e)

Expand Down Expand Up @@ -1867,6 +1873,9 @@ def build_parser(self, prog=None):
subparser.add_argument('--sparse', dest='sparse',
action='store_true', default=False,
help='create holes in output sparse file from all-zero chunks')
subparser.add_argument('--continue', dest='continue_',
action='store_true', default=False,
help='continue interrupted extraction')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to extract')
Expand Down
20 changes: 19 additions & 1 deletion src/borg/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from datetime import datetime, timezone, timedelta
from fnmatch import translate
from functools import wraps, partial, lru_cache
from itertools import islice
from itertools import islice, dropwhile
from operator import attrgetter
from string import Formatter

Expand Down Expand Up @@ -1769,3 +1769,21 @@ def swidth_slice(string, max_width):
if reverse:
result.reverse()
return ''.join(result)


def slice_chunks(chunks, maximum_length):
"""
Slice *chunks* (list(ChunkListEntry)) to remove a prefix of *maximum_length*.
Return (sliced_chunks, prefix_length).
"""
def should_drop(chunk):
nonlocal current_length
dropped = (current_length + chunk.size) <= maximum_length
if dropped:
current_length += chunk.size
return dropped
current_length = 0
sliced_chunks = list(dropwhile(should_drop, chunks))
assert current_length >= 0
return sliced_chunks, current_length
16 changes: 16 additions & 0 deletions src/borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,22 @@ def test_extract_with_pattern(self):
self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"])

def test_extract_continue(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 100)
self.cmd('create', self.repository_location + '::test', 'input')

with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
with open('input/file1', 'ab') as file:
file.truncate(1234)
with open('input/file2', 'ab') as file:
file.write(b'extra bytes')
print(self.cmd('extract', self.repository_location + '::test', '--continue'))
assert os.path.getsize('input/file1') == 1024 * 80
assert os.path.getsize('input/file2') == 1024 * 100

def test_extract_list_output(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file', size=1024 * 80)
Expand Down
29 changes: 28 additions & 1 deletion src/borg/testsuite/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import msgpack.fallback

from .. import platform
from ..helpers import Location
from ..cache import ChunkListEntry
from ..helpers import Location, slice_chunks
from ..helpers import Buffer
from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders
from ..helpers import make_path_safe, clean_lines
Expand Down Expand Up @@ -1081,3 +1082,29 @@ def test_swidth_slice_mixed_characters():
string = '나윤a선나윤선나윤선나윤선나윤선'
assert swidth_slice(string, 5) == '나윤a'
assert swidth_slice(string, 6) == '나윤a'


class TestSliceChunks:
@staticmethod
def chunk(size):
return ChunkListEntry(None, size, 0)

@pytest.mark.parametrize('chunks, offset, expected_chunks, expected_prefix_length', (
# Edge case: offset exactly on chunk boundary
([(1, 1000), (2, 500)], 1000, [(2, 500)], 1000),
([(1, 1000), (2, 500)], 999, [(1, 1000), (2, 500)], 0),
([(1, 1000), (2, 500)], 1001, [(2, 500)], 1000),
# Edge case: offset completely consumes
([(1, 1000), (2, 500)], 1500, [], 1500),
([(1, 1000), (2, 500)], 1499, [(2, 500)], 1000),
# Edge case: offset > length of chunks
([(1, 1000), (2, 500)], 1501, [], 1500),
))
def test_basic(self, chunks, offset, expected_chunks, expected_prefix_length):
chunks = [ChunkListEntry(id, size, 0) for id, size in chunks]
expected_chunks = [ChunkListEntry(id, size, 0) for id, size in expected_chunks]
chunks, remaing_offset = slice_chunks(chunks, offset)
assert chunks == expected_chunks
assert remaing_offset == expected_prefix_length

0 comments on commit 6da395b

Please sign in to comment.