Skip to content

Commit

Permalink
Merge pull request #3300 from ThomasWaldmann/mount-options
Browse files Browse the repository at this point in the history
borg mount: support exclusion group options and paths, fixes #2138
  • Loading branch information
ThomasWaldmann authored Nov 23, 2017
2 parents 483d3e8 + caece37 commit afc84ca
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 38 deletions.
3 changes: 3 additions & 0 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2325,6 +2325,9 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
subparser.add_argument('-o', dest='options', type=str,
help='Extra mount options')
define_archive_filters_group(subparser)
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract; patterns are supported')
define_exclusion_group(subparser, strip_components=True)
if parser.prog == 'borgfs':
return parser

Expand Down
90 changes: 62 additions & 28 deletions src/borg/fuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
logger = create_logger()

from .crypto.low_level import blake2b_128
from .archiver import Archiver
from .archive import Archive
from .hashindex import FuseVersionsIndex
from .helpers import daemonize, hardlinkable, signal_handler, format_file_size
Expand Down Expand Up @@ -118,7 +119,7 @@ def get(self, inode):
else:
raise ValueError('Invalid entry type in self.meta')

def iter_archive_items(self, archive_item_ids):
def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False):
unpacker = msgpack.Unpacker()

# Current offset in the metadata stream, which consists of all metadata chunks glued together
Expand Down Expand Up @@ -161,6 +162,11 @@ def write_bytes(append_msgpacked_bytes):
# Need more data, feed the next chunk
break

item = Item(internal_dict=item)
if filter and not filter(item) or not consider_part_files and 'part' in item:
msgpacked_bytes = b''
continue

current_item = msgpacked_bytes
current_item_length = len(current_item)
current_spans_chunks = stream_offset - current_item_length < chunk_begin
Expand Down Expand Up @@ -197,7 +203,7 @@ def write_bytes(append_msgpacked_bytes):
inode = write_offset + self.offset
write_offset += 9

yield inode, Item(internal_dict=item)
yield inode, item

self.write_offset = write_offset

Expand Down Expand Up @@ -289,7 +295,21 @@ def _process_archive(self, archive_name, prefix=[]):
t0 = time.perf_counter()
archive = Archive(self.repository_uncached, self.key, self._manifest, archive_name,
consider_part_files=self._args.consider_part_files)
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items):
strip_components = self._args.strip_components
matcher = Archiver.build_matcher(self._args.patterns, self._args.paths)
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None

def peek_and_store_hardlink_masters(item, matched):
if (partial_extract and not matched and hardlinkable(item.mode) and
item.get('hardlink_master', True) and 'source' not in item):
hardlink_masters[item.get('path')] = (item.get('chunks'), None)

filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter,
consider_part_files=self._args.consider_part_files):
if strip_components:
item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
path = os.fsencode(item.path)
is_dir = stat.S_ISDIR(item.mode)
if is_dir:
Expand All @@ -307,11 +327,16 @@ def _process_archive(self, archive_name, prefix=[]):
parent = 1
for segment in segments[:-1]:
parent = self._process_inner(segment, parent)
self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode,
hardlink_masters, strip_components)
duration = time.perf_counter() - t0
logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name)

def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components):
path = item.path
del item.path # save some space
hardlink_masters = hardlink_masters or {}

def file_version(item, path):
if 'chunks' in item:
file_id = blake2b_128(path)
Expand All @@ -336,35 +361,44 @@ def make_versioned_name(name, version, add_dir=False):
version_enc = os.fsencode('.%05d' % version)
return name + version_enc + ext

if 'source' in item and hardlinkable(item.mode):
source = os.path.join(*item.source.split(os.sep)[stripped_components:])
chunks, link_target = hardlink_masters.get(item.source, (None, source))
if link_target:
# Hard link was extracted previously, just link
link_target = os.fsencode(link_target)
if self.versions:
# adjust link target name with version
version = self.file_versions[link_target]
link_target = make_versioned_name(link_target, version, add_dir=True)
try:
inode = self.find_inode(link_target, prefix)
except KeyError:
logger.warning('Skipping broken hard link: %s -> %s', path, source)
return
item = self.get_item(inode)
item.nlink = item.get('nlink', 1) + 1
self._items[inode] = item
elif chunks is not None:
# assign chunks to this item, since the item which had the chunks was not extracted
item.chunks = chunks
inode = item_inode
self._items[inode] = item
if hardlink_masters:
# Update master entry with extracted item path, so that following hardlinks don't extract twice.
hardlink_masters[item.source] = (None, path)
else:
inode = item_inode

if self.versions and not is_dir:
parent = self._process_inner(name, parent)
path = os.fsencode(item.path)
version = file_version(item, path)
enc_path = os.fsencode(path)
version = file_version(item, enc_path)
if version is not None:
# regular file, with contents - maybe a hardlink master
name = make_versioned_name(name, version)
self.file_versions[path] = version
self.file_versions[enc_path] = version

path = item.path
del item.path # save some space
if 'source' in item and hardlinkable(item.mode):
# a hardlink, no contents, <source> is the hardlink master
source = os.fsencode(item.source)
if self.versions:
# adjust source name with version
version = self.file_versions[source]
source = make_versioned_name(source, version, add_dir=True)
name = make_versioned_name(name, version)
try:
inode = self.find_inode(source, prefix)
except KeyError:
logger.warning('Skipping broken hard link: %s -> %s', path, item.source)
return
item = self.cache.get(inode)
item.nlink = item.get('nlink', 1) + 1
self._items[inode] = item
else:
inode = item_inode
self.parent[inode] = parent
if name:
self.contents[parent][name] = inode
Expand Down
56 changes: 46 additions & 10 deletions src/borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ def _extract_hardlinks_setup(self):
os.mkdir(os.path.join(self.input_path, 'dir1'))
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))

self.create_regular_file('source')
self.create_regular_file('source', contents=b'123456')
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'abba'))
os.link(os.path.join(self.input_path, 'source'),
Expand All @@ -778,30 +778,56 @@ def _extract_hardlinks_setup(self):
requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')

@requires_hardlinks
def test_strip_components_links(self):
@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_mount_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \
changedir(mountpoint):
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert open('subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \
changedir(mountpoint):
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with self.fuse_mount(self.repository_location + '::test', mountpoint), \
changedir(mountpoint):
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'

@requires_hardlinks
def test_extract_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert open('subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'

def test_extract_include_exclude(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
Expand Down Expand Up @@ -2182,8 +2208,9 @@ def test_fuse_versions_view(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file('test', contents=b'first')
if are_hardlinks_supported():
self.create_regular_file('hardlink1', contents=b'')
self.create_regular_file('hardlink1', contents=b'123456')
os.link('input/hardlink1', 'input/hardlink2')
os.link('input/hardlink1', 'input/hardlink3')
self.cmd('create', self.repository_location + '::archive1', 'input')
self.create_regular_file('test', contents=b'second')
self.cmd('create', self.repository_location + '::archive2', 'input')
Expand All @@ -2195,9 +2222,18 @@ def test_fuse_versions_view(self):
assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there
assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
if are_hardlinks_supported():
st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
assert st1.st_ino == st2.st_ino
hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')
hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, 'rb').read() == b'123456'
# similar again, but exclude the hardlink master:
with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'):
if are_hardlinks_supported():
hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
assert os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, 'rb').read() == b'123456'

@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_fuse_allow_damaged_files(self):
Expand Down

0 comments on commit afc84ca

Please sign in to comment.