Skip to content

Commit

Permalink
feat: Adds zip
Browse files Browse the repository at this point in the history
  • Loading branch information
ntlhui committed Dec 16, 2024
1 parent 7c920b1 commit 6cdc42b
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 22 deletions.
27 changes: 9 additions & 18 deletions e4e_data_management/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,24 +282,7 @@ def push(self, path: Path) -> None:
Args:
path (Path): Destination to push completed dataset to
"""
if any(len(mission.staged_files) != 0
for mission in self.active_dataset.missions.values()) or \
len(self.active_dataset.staged_files) != 0:
raise RuntimeError('Files still in staging')

# Check that the README is present
readmes = [file
for file in list(self.active_dataset.root.glob('*'))
if re.match(fnmatch.translate('readme.*'), file.name, re.IGNORECASE)]

if len(readmes) == 0:
raise RuntimeError('Readme not found')
acceptable_exts = ['.md', '.docx']
if not any(readme.suffix.lower() in acceptable_exts for readme in readmes):
raise RuntimeError('Illegal README format')

# validate self
self.active_dataset.validate()
self.active_dataset.check_complete()

# Duplicate to destination
destination = path.joinpath(self.active_dataset.name)
Expand All @@ -317,6 +300,14 @@ def zip(self, output_path: Path) -> None:
Args:
output_path (Path): Output path
"""
if output_path.suffix.lower() != '.zip':
output_path = output_path.joinpath(
self.active_dataset.name + '.zip')

output_path.parent.mkdir(parents=True, exist_ok=True)
self.active_dataset.check_complete()

self.active_dataset.create_zip(output_path)

def unzip(self, input_file: Path, output_path: Path) -> None:
"""This will unzip the archived dataset to the specified root
Expand Down
42 changes: 38 additions & 4 deletions e4e_data_management/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
'''Data classes
'''
from __future__ import annotations

import re
import fnmatch
import datetime as dt
import json
import logging
Expand All @@ -15,7 +16,7 @@
Union)

from e4e_data_management.metadata import Metadata

from e4e_data_management.exception import MissionFilesInStaging, ReadmeFilesInStaging, ReadmeNotFound, CorruptedDataset

@dataclass
class StagedFile:
Expand Down Expand Up @@ -526,5 +527,38 @@ def create_zip(self, zip_path: Path) -> None:
if zip_path.suffix.lower() != '.zip':
raise RuntimeError('Invalid suffix')

with zipfile.ZipFile(file=zip_path, mode='w') as _:
pass
with zipfile.ZipFile(file=zip_path, mode='w') as handle:
manifest = self.manifest.get_dict()
for file in manifest:
src_path = self.root.joinpath(file)
dest = Path(self.name) / file
handle.write(filename=src_path, arcname=dest)

def check_complete(self) -> None:
"""Checks if the dataset is complete
Raises:
MissionFilesInStaging: Mission files remain in staging
ReadmeFilesInStaging: Readme files remain in staging
ReadmeNotFound: Readme files not found
ReadmeNotFound: Readme files with acceptable extension not found
CorruptedDataset: Dataset checksum validation failed
"""
staged_mission_files = (mission.staged_files
for mission in self.missions.values())
if any(len(staged) for staged in staged_mission_files):
raise MissionFilesInStaging
if len(self.staged_files) != 0:
raise ReadmeFilesInStaging

readmes = [file for file in self.root.glob('*')
if re.match(fnmatch.translate('readme.*'), file.name, re.IGNORECASE)]
if len(readmes) == 0:
raise ReadmeNotFound

acceptable_exts = ['.md', '.docx']
if not any(readme.suffix.lower() in acceptable_exts for readme in readmes):
raise ReadmeNotFound('Acceptable extension not found')

if not self.validate():
raise CorruptedDataset
28 changes: 28 additions & 0 deletions e4e_data_management/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'''E4E Data Management Exceptions
'''
from abc import ABC


class Incomplete(Exception, ABC):
"""Dataset not complete
"""


class MissionFilesInStaging(Incomplete):
"""Mission files still in staging area
"""


class ReadmeFilesInStaging(Incomplete):
"""Readme files still in staging area
"""


class ReadmeNotFound(Incomplete):
"""Readme files not found
"""


class CorruptedDataset(Exception):
"""Corrupted Dataset
"""
47 changes: 47 additions & 0 deletions tests/test_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'''Tests zipping
'''
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Tuple
from unittest.mock import Mock
import zipfile
from e4e_data_management.core import DataManager

SingleMissionFixture = Tuple[Tuple[Mock,
DataManager, Path], Tuple[Path, int, int]]


def test_zip_to_dir(single_mission_data: SingleMissionFixture,
test_readme: Path):
"""Tests zipping data
Args:
single_mission(SingleMissionFixture): Single Mission test fixture
test_readme (Path): Test Readme
"""
test_app, _ = single_mission_data
_, app, _ = test_app

app.add([test_readme], readme=True)
app.commit(readme=True)
with TemporaryDirectory() as target_dir:
zip_path = Path(target_dir)
app.zip(zip_path)

final_path = zip_path.joinpath(app.active_dataset.name + '.zip')
assert final_path.is_file()

with zipfile.ZipFile(file=final_path, mode='r') as handle:
assert handle.testzip() is None
manifest = app.active_dataset.manifest.get_dict()
for name in handle.filelist:
ar_name = Path(name.filename).relative_to(
app.active_dataset.name)
assert ar_name.as_posix() in manifest

handle.extractall(target_dir)

app.active_dataset.manifest.validate(
manifest=manifest,
files=Path(app.active_dataset.name).rglob('*')
)

0 comments on commit 6cdc42b

Please sign in to comment.