Skip to content

Commit

Permalink
Merge pull request #484 from OP-TED/feature/TED-1398
Browse files Browse the repository at this point in the history
updated METS package publishing + tests
  • Loading branch information
Dragos0000 authored Jul 2, 2023
2 parents 4b5f1bc + 47ed19f commit 1289e20
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 32 deletions.
5 changes: 3 additions & 2 deletions ted_sws/core/model/manifestation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from ted_sws.core.model import PropertyBaseModel
from ted_sws.core.model.validation_report_data import ReportNoticeData
from ted_sws.notice_packager.model.metadata import METS_TYPE_CREATE


class ManifestationMimeType(Enum):
Expand Down Expand Up @@ -195,6 +196,8 @@ class METSManifestation(Manifestation):
"""
"""
type: str = METS_TYPE_CREATE
package_name: str = None


class RDFValidationManifestation(ValidationManifestation):
Expand Down Expand Up @@ -301,5 +304,3 @@ def is_validated(self) -> bool:
if len(self.shacl_validations) and len(self.sparql_validations):
return True
return False


17 changes: 13 additions & 4 deletions ted_sws/notice_packager/services/notice_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
FILE_METS_ACTION_FORMAT = "{work_identifier}_{action}.mets.xml"
DEFAULT_RDF_FILE_FORMAT = "turtle"


# The naming convention for an TEDRDF package:
# {year}_{notice_metadata.ojs_type}_{notice_metadata.ojs_issue_number}_{notice_number}_{action}
# ex.: "2021_S_4_003544_create.zip" , where:
Expand All @@ -53,8 +54,15 @@ def package_notice(notice: Notice, action: str = METS_TYPE_CREATE) -> Notice:
notice_packager = NoticePackager(notice, action)
notice_packager.add_template_files()
notice_packager.add_rdf_content()
mets_manifestation_content = notice_packager.pack()
notice.set_mets_manifestation(mets_manifestation=METSManifestation(object_data=mets_manifestation_content))
package_name = notice_packager.get_archive_name()
mets_manifestation_content = notice_packager.pack(package_name=package_name)
notice.set_mets_manifestation(
mets_manifestation=METSManifestation(
object_data=mets_manifestation_content,
type=action,
package_name=package_name
)
)
return notice


Expand Down Expand Up @@ -134,9 +142,10 @@ def get_archive_name(self) -> str:
)
return archive_name

def pack(self) -> str:
def pack(self, package_name: str = None) -> str:
package_name = package_name or self.get_archive_name()
archiver = ZipArchiver()
archive_path = self.tmp_dir_path / self.get_archive_name()
archive_path = self.tmp_dir_path / package_name
package_path = archiver.process_archive(archive_path, self.files)
raw_archive_content = package_path.read_bytes()
archive_content = base64.b64encode(raw_archive_content)
Expand Down
14 changes: 7 additions & 7 deletions ted_sws/notice_publisher/adapters/sftp_notice_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ class SFTPPublisher(SFTPPublisherABC):
def __init__(self, hostname: str = None, username: str = None, password: str = None, port: int = None,
private_key: str = None, private_key_passphrase: str = None):
"""Constructor Method"""
self.hostname = hostname if hostname else config.SFTP_PUBLISH_HOST
self.username = username if username else config.SFTP_PUBLISH_USER
self.password = password if password else config.SFTP_PUBLISH_PASSWORD
self.port = port if port else config.SFTP_PUBLISH_PORT
self.hostname = hostname or config.SFTP_PUBLISH_HOST
self.username = username or config.SFTP_PUBLISH_USER
self.password = password or config.SFTP_PUBLISH_PASSWORD
self.port = port or config.SFTP_PUBLISH_PORT
self.connection = None
self.is_connected = False
self.private_key = None
self.private_key_passphrase = private_key_passphrase if private_key_passphrase else config.SFTP_PRIVATE_KEY_PASSPHRASE
private_key = private_key if private_key else config.SFTP_PRIVATE_KEY
self.private_key_passphrase = private_key_passphrase or config.SFTP_PRIVATE_KEY_PASSPHRASE
private_key = private_key or config.SFTP_PRIVATE_KEY
if private_key:
self.private_key = paramiko.RSAKey.from_private_key(io.StringIO(private_key),
password=self.private_key_passphrase)
Expand Down Expand Up @@ -77,4 +77,4 @@ def exists(self, remote_path: str) -> bool:
self._sftp.stat(remote_path)
except IOError:
return False
return True
return True
26 changes: 17 additions & 9 deletions ted_sws/notice_publisher/services/notice_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@ def publish_notice(notice: Notice, publisher: SFTPPublisherABC = None,
"""
This function publishes the METS manifestation for a Notice in Cellar.
"""
publisher = publisher if publisher else SFTPPublisher()
remote_folder_path = remote_folder_path if remote_folder_path else config.SFTP_PUBLISH_PATH
publisher = publisher or SFTPPublisher()
remote_folder_path = remote_folder_path or config.SFTP_PUBLISH_PATH
mets_manifestation = notice.mets_manifestation
if not mets_manifestation or not mets_manifestation.object_data:
raise ValueError("Notice does not have a METS manifestation to be published.")

package_name = mets_manifestation.package_name
if not package_name:
raise ValueError("METS manifestation does not have a package name for publishing.")

package_content = base64.b64decode(bytes(mets_manifestation.object_data, encoding='utf-8'), validate=True)
remote_notice_path = f"{remote_folder_path}/{notice.ted_id}{DEFAULT_NOTICE_PACKAGE_EXTENSION}"
remote_notice_path = f"{remote_folder_path}/{package_name}"
source_file = tempfile.NamedTemporaryFile()
source_file.write(package_content)
try:
Expand Down Expand Up @@ -62,15 +66,19 @@ def publish_notice_into_s3(notice: Notice, s3_publisher: S3Publisher = None,
:param bucket_name:
:return:
"""
s3_publisher = s3_publisher if s3_publisher else S3Publisher()
s3_publisher = s3_publisher or S3Publisher()
bucket_name = bucket_name or config.S3_PUBLISH_NOTICE_BUCKET
mets_manifestation = notice.mets_manifestation
if not mets_manifestation or not mets_manifestation.object_data:
raise ValueError("Notice does not have a METS manifestation to be published.")

package_name = mets_manifestation.package_name
if not package_name:
raise ValueError("METS manifestation does not have a package name for publishing.")

package_content = base64.b64decode(bytes(mets_manifestation.object_data, encoding='utf-8'), validate=True)
result: S3PublishResult = s3_publisher.publish(bucket_name=bucket_name,
object_name=f"{notice.ted_id}{DEFAULT_NOTICE_PACKAGE_EXTENSION}",
object_name=f"{package_name}",
data=package_content)
return result is not None

Expand All @@ -86,7 +94,7 @@ def publish_notice_into_s3_by_id(notice_id: str, notice_repository: NoticeReposi
:param bucket_name:
:return:
"""
s3_publisher = s3_publisher if s3_publisher else S3Publisher()
s3_publisher = s3_publisher or S3Publisher()
bucket_name = bucket_name or config.S3_PUBLISH_NOTICE_BUCKET
notice = notice_repository.get(reference=notice_id)
result = publish_notice_into_s3(notice=notice, bucket_name=bucket_name, s3_publisher=s3_publisher)
Expand All @@ -102,7 +110,7 @@ def publish_notice_rdf_into_s3(notice: Notice, s3_publisher: S3Publisher = None,
:param bucket_name:
:return:
"""
s3_publisher = s3_publisher if s3_publisher else S3Publisher()
s3_publisher = s3_publisher or S3Publisher()
bucket_name = bucket_name or config.S3_PUBLISH_NOTICE_RDF_BUCKET
rdf_manifestation: RDFManifestation = notice.distilled_rdf_manifestation
result: bool = publish_notice_rdf_content_into_s3(
Expand All @@ -125,7 +133,7 @@ def publish_notice_rdf_into_s3_by_id(notice_id: str, notice_repository: NoticeRe
:param bucket_name:
:return:
"""
s3_publisher = s3_publisher if s3_publisher else S3Publisher()
s3_publisher = s3_publisher or S3Publisher()
bucket_name = bucket_name or config.S3_PUBLISH_NOTICE_RDF_BUCKET
notice = notice_repository.get(reference=notice_id)
return publish_notice_rdf_into_s3(notice=notice, bucket_name=bucket_name, s3_publisher=s3_publisher)
Expand All @@ -143,7 +151,7 @@ def publish_notice_rdf_content_into_s3(rdf_manifestation: RDFManifestation,
:param bucket_name:
:return:
"""
s3_publisher = s3_publisher if s3_publisher else S3Publisher()
s3_publisher = s3_publisher or S3Publisher()
if not rdf_manifestation or not rdf_manifestation.object_data:
raise ValueError("Notice does not have a RDF manifestation to be published.")

Expand Down
30 changes: 28 additions & 2 deletions tests/e2e/notice_publisher/services/test_notice_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

def test_notice_publisher(notice_2016, fake_mongodb_client):
notice = notice_2016
package_name = "test_package.zip"
notice_repository = NoticeRepository(mongodb_client=fake_mongodb_client)
notice_repository.add(notice)
notice_id = notice.ted_id
Expand All @@ -34,6 +35,16 @@ def test_notice_publisher(notice_2016, fake_mongodb_client):
notice_repository.update(notice)
sftp_publisher = SFTPPublisher()

with pytest.raises(ValueError):
publish_notice_by_id(notice_id, notice_repository, publisher=sftp_publisher)

mets_manifestation = METSManifestation(
object_data="62f0baf9a5458a3a67761392",
package_name=package_name
)
notice.set_mets_manifestation(mets_manifestation)
notice._status = NoticeStatus.ELIGIBLE_FOR_PUBLISHING
notice_repository.update(notice)
published = publish_notice_by_id(notice_id, notice_repository, publisher=sftp_publisher)

assert published
Expand All @@ -48,7 +59,7 @@ def test_notice_publisher(notice_2016, fake_mongodb_client):
notice._mets_manifestation = None
publish_notice(notice, publisher=sftp_publisher)
sftp_publisher.connect()
sftp_publisher.remove(f"{config.SFTP_PUBLISH_PATH}/{notice.ted_id}{DEFAULT_NOTICE_PACKAGE_EXTENSION}")
sftp_publisher.remove(f"{config.SFTP_PUBLISH_PATH}/{package_name}")
sftp_publisher.disconnect()
assert not sftp_publisher.is_connected

Expand All @@ -59,7 +70,7 @@ def test_s3_notice_publisher(notice_2016, fake_mongodb_client, notice_s3_bucket_
notice_repository = NoticeRepository(mongodb_client=fake_mongodb_client)
notice_repository.add(notice)
notice_id = notice.ted_id
object_name = f"{notice_id}{DEFAULT_NOTICE_PACKAGE_EXTENSION}"
object_name = "test_package.zip"

rdf_manifestation = RDFManifestation(object_data="62f0baf9a5458a3a67761392")
mets_manifestation = notice_mets_manifestation
Expand All @@ -76,6 +87,21 @@ def test_s3_notice_publisher(notice_2016, fake_mongodb_client, notice_s3_bucket_

notice_repository.update(notice)

with pytest.raises(ValueError):
publish_notice_into_s3_by_id(
notice_id=notice_id,
notice_repository=notice_repository,
bucket_name=notice_s3_bucket_name,
s3_publisher=s3_publisher
)

mets_manifestation = METSManifestation(
object_data="62f0baf9a5458a3a67761392",
package_name=object_name
)
notice.set_mets_manifestation(mets_manifestation)
notice._status = NoticeStatus.ELIGIBLE_FOR_PUBLISHING
notice_repository.update(notice)
publish_result: bool = publish_notice_into_s3_by_id(
notice_id=notice_id,
notice_repository=notice_repository,
Expand Down
12 changes: 7 additions & 5 deletions tests/features/notice_publisher/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ def mongodb_client():


@pytest.fixture(scope="function")
def publish_eligible_notice(publicly_available_notice) -> Notice:
def publish_eligible_notice(publicly_available_notice, mets_package_published_name) -> Notice:
notice = publicly_available_notice
notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_PUBLISHING)
notice._mets_manifestation = METSManifestation(
object_data=base64.b64encode("METS manifestation content".encode("utf-8")))
object_data=base64.b64encode("METS manifestation content".encode("utf-8")),
package_name=mets_package_published_name
)
return notice


Expand All @@ -54,9 +56,9 @@ def s3_bucket_name():
return "tmp-test-bucket"

@pytest.fixture
def mets_package_published_name(publish_eligible_notice):
return f"{publish_eligible_notice.ted_id}.zip"
def mets_package_published_name():
return "test_package.zip"

@pytest.fixture
def rdf_manifestation_published_name(publish_eligible_notice):
return f"{publish_eligible_notice.ted_id}.ttl"
return f"{publish_eligible_notice.ted_id}.ttl"
1 change: 0 additions & 1 deletion tests/features/notice_publisher/test_notice_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from ted_sws.core.model.notice import Notice, NoticeStatus
from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC
from ted_sws.notice_publisher.adapters.sftp_notice_publisher import SFTPPublisher
from ted_sws.notice_publisher.adapters.sftp_publisher_abc import SFTPPublisherABC
from ted_sws.notice_publisher.services.notice_publisher import publish_notice, publish_notice_by_id


Expand Down
8 changes: 6 additions & 2 deletions tests/unit/notice_packager/test_notice_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@

from ted_sws.core.model.manifestation import RDFManifestation
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.notice_packager.services.notice_packager import package_notice
from ted_sws.notice_packager.model.metadata import METS_TYPE_CREATE
from ted_sws.notice_packager.services.notice_packager import package_notice, NoticePackager


def test_notice_packager_with_notice(notice_2018, rdf_content):
rdf_manifestation = RDFManifestation(object_data=rdf_content)
notice_2018._status = NoticeStatus.ELIGIBLE_FOR_PACKAGING
notice_2018._rdf_manifestation = rdf_manifestation
notice_2018._distilled_rdf_manifestation = rdf_manifestation
packaged_notice = package_notice(notice_2018)
packaged_notice = package_notice(notice_2018, action=METS_TYPE_CREATE)

assert packaged_notice.mets_manifestation
assert packaged_notice.mets_manifestation.type == METS_TYPE_CREATE
assert packaged_notice.mets_manifestation.package_name == "2018_S_22_045279_create.zip"

0 comments on commit 1289e20

Please sign in to comment.