Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

37 zip command #92

Merged
merged 8 commits into from
Jan 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 9 additions & 20 deletions e4e_data_management/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
from __future__ import annotations

import datetime as dt
import fnmatch
import logging
import pickle
import re
from pathlib import Path
from shutil import copy2, rmtree
from typing import Dict, Iterable, List, Optional, Set
Expand Down Expand Up @@ -282,24 +280,7 @@ def push(self, path: Path) -> None:
Args:
path (Path): Destination to push completed dataset to
"""
if any(len(mission.staged_files) != 0
for mission in self.active_dataset.missions.values()) or \
len(self.active_dataset.staged_files) != 0:
raise RuntimeError('Files still in staging')

# Check that the README is present
readmes = [file
for file in list(self.active_dataset.root.glob('*'))
if re.match(fnmatch.translate('readme.*'), file.name, re.IGNORECASE)]

if len(readmes) == 0:
raise RuntimeError('Readme not found')
acceptable_exts = ['.md', '.docx']
if not any(readme.suffix.lower() in acceptable_exts for readme in readmes):
raise RuntimeError('Illegal README format')

# validate self
self.active_dataset.validate()
self.active_dataset.check_complete()

# Duplicate to destination
destination = path.joinpath(self.active_dataset.name)
Expand All @@ -317,6 +298,14 @@ def zip(self, output_path: Path) -> None:
Args:
output_path (Path): Output path
"""
if output_path.suffix.lower() != '.zip':
output_path = output_path.joinpath(
self.active_dataset.name + '.zip')

output_path.parent.mkdir(parents=True, exist_ok=True)
self.active_dataset.check_complete()

self.active_dataset.create_zip(output_path)

def unzip(self, input_file: Path, output_path: Path) -> None:
"""This will unzip the archived dataset to the specified root
Expand Down
52 changes: 52 additions & 0 deletions e4e_data_management/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@
from __future__ import annotations

import datetime as dt
import fnmatch
import json
import logging
import pickle
import re
import zipfile
from dataclasses import dataclass
from hashlib import sha256
from pathlib import Path
from shutil import copy2
from typing import (Callable, Dict, Generator, Iterable, List, Optional, Set,
Union)

from e4e_data_management.exception import (CorruptedDataset,
MissionFilesInStaging,
ReadmeFilesInStaging,
ReadmeNotFound)
from e4e_data_management.metadata import Metadata


Expand Down Expand Up @@ -515,3 +522,48 @@ def commit(self) -> List[Path]:
committed_files.extend(new_files)
self.staged_files = []
return committed_files

def create_zip(self, zip_path: Path) -> None:
"""Creates a .zip archive of this Dataset at the specified location

Args:
zip_path (Path): Path to .zip archive
"""
if zip_path.suffix.lower() != '.zip':
raise RuntimeError('Invalid suffix')

with zipfile.ZipFile(file=zip_path, mode='w') as handle:
manifest = self.manifest.get_dict()
for file in manifest:
src_path = self.root.joinpath(file)
dest = Path(self.name) / file
handle.write(filename=src_path, arcname=dest)

def check_complete(self) -> None:
"""Checks if the dataset is complete

Raises:
MissionFilesInStaging: Mission files remain in staging
ReadmeFilesInStaging: Readme files remain in staging
ReadmeNotFound: Readme files not found
ReadmeNotFound: Readme files with acceptable extension not found
CorruptedDataset: Dataset checksum validation failed
"""
staged_mission_files = (mission.staged_files
for mission in self.missions.values())
if any(len(staged) for staged in staged_mission_files):
raise MissionFilesInStaging
if len(self.staged_files) != 0:
raise ReadmeFilesInStaging

readmes = [file for file in self.root.glob('*')
if re.match(fnmatch.translate('readme.*'), file.name, re.IGNORECASE)]
if len(readmes) == 0:
raise ReadmeNotFound

acceptable_exts = ['.md', '.docx']
if not any(readme.suffix.lower() in acceptable_exts for readme in readmes):
raise ReadmeNotFound('Acceptable extension not found')

if not self.validate():
raise CorruptedDataset
28 changes: 28 additions & 0 deletions e4e_data_management/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'''E4E Data Management Exceptions
'''
from abc import ABC


class Incomplete(Exception, ABC):
"""Dataset not complete
"""


class MissionFilesInStaging(Incomplete):
"""Mission files still in staging area
"""


class ReadmeFilesInStaging(Incomplete):
"""Readme files still in staging area
"""


class ReadmeNotFound(Incomplete):
"""Readme files not found
"""


class CorruptedDataset(Exception):
"""Corrupted Dataset
"""
47 changes: 47 additions & 0 deletions tests/test_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'''Tests zipping
'''
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Tuple
from unittest.mock import Mock
import zipfile
from e4e_data_management.core import DataManager

SingleMissionFixture = Tuple[Tuple[Mock,
DataManager, Path], Tuple[Path, int, int]]


def test_zip_to_dir(single_mission_data: SingleMissionFixture,
test_readme: Path):
"""Tests zipping data

Args:
single_mission(SingleMissionFixture): Single Mission test fixture
test_readme (Path): Test Readme
"""
test_app, _ = single_mission_data
_, app, _ = test_app

app.add([test_readme], readme=True)
app.commit(readme=True)
with TemporaryDirectory() as target_dir:
zip_path = Path(target_dir)
app.zip(zip_path)

final_path = zip_path.joinpath(app.active_dataset.name + '.zip')
assert final_path.is_file()

with zipfile.ZipFile(file=final_path, mode='r') as handle:
assert handle.testzip() is None
manifest = app.active_dataset.manifest.get_dict()
for name in handle.filelist:
ar_name = Path(name.filename).relative_to(
app.active_dataset.name)
assert ar_name.as_posix() in manifest

handle.extractall(target_dir)

app.active_dataset.manifest.validate(
manifest=manifest,
files=Path(app.active_dataset.name).rglob('*')
)
Loading