Skip to content

Commit

Permalink
support creating zip SBTs
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Apr 27, 2020
1 parent 0e956ad commit 71449e2
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 26 deletions.
54 changes: 37 additions & 17 deletions sourmash/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,26 +484,36 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
"""
version = 5

if path.endswith('.sbt.json'):
path = path[:-9]
fn = os.path.abspath(path + '.sbt.json')
if path.endswith(".zip"):
storage = ZipStorage(path)
kind = "Zip"
backend = "FSStorage"
subdir = '.sbt.{}'.format(os.path.basename(path[:-4]))
storage_args = FSStorage("", subdir).init_args()
storage.save(subdir + "/", "")
else:
kind = "FS"
if path.endswith('.sbt.json'):
path = path[:-9]
fn = os.path.abspath(path + '.sbt.json')

if storage is None:
# default storage
location = os.path.dirname(fn)
subdir = '.sbt.{}'.format(os.path.basename(path))
if storage is None:
# default storage
location = os.path.dirname(fn)
subdir = '.sbt.{}'.format(os.path.basename(path))

storage = FSStorage(location, subdir)
fn = os.path.join(location, fn)
storage = FSStorage(location, subdir)
fn = os.path.join(location, fn)

backend = [k for (k, v) in STORAGES.items() if v == type(storage)][0]
backend = [k for (k, v) in STORAGES.items() if v == type(storage)][0]
storage_args = storage.init_args()

info = {}
info['d'] = self.d
info['version'] = version
info['storage'] = {
'backend': backend,
'args': storage.init_args()
'args': storage_args
}
info['factory'] = {
'class': GraphFactory.__name__,
Expand Down Expand Up @@ -540,10 +550,11 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):

node.storage = storage

data['filename'] = node.save(data['filename'])
if kind == "Zip":
node.save(os.path.join(subdir, data['filename']))
elif kind == "FS":
data['filename'] = node.save(data['filename'])

node.storage = storage
data['filename'] = node.save(data['filename'])
if isinstance(node, Node):
nodes[i] = data
else:
Expand All @@ -555,10 +566,19 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False):
notify("\nFinished saving nodes, now saving SBT json file.")
info['nodes'] = nodes
info['signatures'] = leaves
with open(fn, 'w') as fp:
json.dump(info, fp)

return fn
if kind == "Zip":
tree_data = json.dumps(info)
save_path = os.path.basename(path)[:-4] + ".sbt.json"
storage.save(save_path, tree_data)
storage.close()

elif kind == "FS":
with open(fn, 'w') as fp:
json.dump(info, fp)

return path


@classmethod
def load(cls, location, leaf_loader=None, storage=None, print_version_warning=True):
Expand Down
13 changes: 6 additions & 7 deletions sourmash/sbt_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def __enter__(self):
return self

def __exit__(self, type, value, traceback):
self.close()

def close(self):
pass

def can_open(self, location):
Expand Down Expand Up @@ -100,13 +103,6 @@ def init_args(self):
def __exit__(self, type, value, traceback):
self.tarfile.close()

@staticmethod
def can_open(location):
try:
return tarfile.is_tarfile(location)
except IOError:
return False


class ZipStorage(Storage):

Expand Down Expand Up @@ -149,6 +145,9 @@ def init_args(self):
return {'path': self.path}

def __exit__(self, type, value, traceback):
self.close()

def close(self):
self.zipfile.close()

@staticmethod
Expand Down
32 changes: 30 additions & 2 deletions tests/test_sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,10 @@ def test_sbt_zipstorage(tmpdir):
to_search.data, 0.1)}
print(*old_result, sep='\n')

with ZipStorage(tmpdir.join("tree.zip")) as storage:
with ZipStorage(str(tmpdir.join("tree.zip"))) as storage:
tree.save(str(tmpdir.join("tree")), storage=storage)

with ZipStorage(tmpdir.join("tree.zip")) as storage:
with ZipStorage(str(tmpdir.join("tree.zip"))) as storage:
tree = SBT.load(str(tmpdir.join("tree")),
leaf_loader=SigLeaf.load,
storage=storage)
Expand Down Expand Up @@ -470,6 +470,34 @@ def test_sbt_redisstorage():
assert old_result == new_result


def test_save_zip(tmpdir):
testdata = utils.get_test_data("v5.zip")
testsbt = tmpdir.join("v5.zip")
newsbt = tmpdir.join("new.zip")

shutil.copyfile(testdata, str(testsbt))

tree = SBT.load(str(testsbt), leaf_loader=SigLeaf.load)
tree.save(str(newsbt))
assert newsbt.exists()

new_tree = SBT.load(str(newsbt), leaf_loader=SigLeaf.load)
assert isinstance(new_tree.storage, ZipStorage)

to_search = load_one_signature(utils.get_test_data(utils.SIG_FILES[0]))

print("*" * 60)
print("{}:".format(to_search))
old_result = {str(s) for s in tree.find(search_minhashes, to_search, 0.1)}
new_result = {str(s) for s in new_tree.find(search_minhashes, to_search, 0.1)}
print(*new_result, sep="\n")


assert old_result == new_result
assert len(new_result) == 2



def test_load_zip(tmpdir):
testdata = utils.get_test_data("v5.zip")
testsbt = tmpdir.join("v5.zip")
Expand Down

0 comments on commit 71449e2

Please sign in to comment.