From ab5aadbd0461517b4fc6577f6d3f2955dfc937be Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sun, 25 Feb 2024 10:27:40 -0300 Subject: [PATCH 1/7] Cria a tarefa upload.tasks.task_validate_original_zip_file --- package/tasks.py | 207 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 package/tasks.py diff --git a/package/tasks.py b/package/tasks.py new file mode 100644 index 00000000..abfed93d --- /dev/null +++ b/package/tasks.py @@ -0,0 +1,207 @@ +import json + +from celery.result import AsyncResult +from django.contrib.auth import get_user_model +from django.utils.translation import gettext as _ +from packtools.sps import exceptions as sps_exceptions +from packtools.sps.models import package as sps_package +from packtools.sps.utils import file_utils as sps_file_utils +from packtools.sps.validation import article as sps_validation_article +from packtools.sps.validation import journal as sps_validation_journal +from packtools.validator import ValidationReportXML + +from article.choices import AS_CHANGE_SUBMITTED +from article.controller import create_article_from_etree, update_article +from article.models import Article +from config import celery_app +from issue.models import Issue +from journal.controller import get_journal_dict_for_validation +from libs.dsm.publication.documents import get_document, get_similar_documents + +from . import choices, controller, exceptions +from .utils import file_utils, package_utils, xml_utils +from upload.models import Package + + +User = get_user_model() + + +@celery_app.task(bind=True) +def task_validate(self, sps_pkg_id): + + task_validate_assets.apply_async( + kwargs={ + "sps_pkg_id": sps_pkg_id, + }, + ) + + # Aciona validação de Renditions + task_validate_renditions.apply_async( + kwargs={ + "sps_pkg_id": sps_pkg_id, + }, + ) + + # Aciona validacao do conteudo do XML + task_validate_content_xml.apply_async( + kwargs={ + "sps_pkg_id": sps_pkg_id, + }, + ) + + +@celery_app.task(bind=True) +def task_validate_assets(self, sps_pkg_id): + package_files = file_utils.get_file_list_from_zip(file_path) + article_assets = package_utils.get_article_assets_from_zipped_xml( + file_path, xml_path + ) + + has_errors = False + + for asset_result in package_utils.evaluate_assets(article_assets, package_files): + asset, is_present = asset_result + + if not is_present: + has_errors = True + Package.add_validation_result( + package_id, + error_category=choices.VE_ASSET_ERROR, + status=choices.VS_DISAPPROVED, + message=f'{asset.name} {_("file is mentioned in the XML but not present in the package.")}', + data={ + "xml_path": xml_path, + "id": asset.id, + "type": asset.type, + "missing_file": asset.name, + }, + ) + + Package.add_validation_result( + package_id, + error_category=choices.VE_ASSET_ERROR, + status=choices.VS_DISAPPROVED, + message=f'{asset.name} {_("file is mentioned in the XML but its optimised version not present in the package.")}', + data={ + "xml_path": xml_path, + "id": asset.id, + "type": "optimised", + "missing_file": file_utils.generate_filepath_with_new_extension( + asset.name, ".png" + ), + }, + ) + + Package.add_validation_result( + package_id, + error_category=choices.VE_ASSET_ERROR, + status=choices.VS_DISAPPROVED, + message=f'{asset.name} {_("file is mentioned in the XML but its thumbnail version not present in the package.")}', + data={ + "xml_path": xml_path, + "id": asset.id, + "type": "thumbnail", + "missing_file": file_utils.generate_filepath_with_new_extension( + asset.name, ".thumbnail.jpg" + ), + }, + ) + + if not has_errors: + Package.add_validation_result( + package_id, + error_category=choices.VE_ASSET_ERROR, + status=choices.VS_APPROVED, + data={"xml_path": xml_path}, + ) + return True + + +@celery_app.task(bind=True) +def task_validate_renditions(self, sps_pkg_id): + package_files = file_utils.get_file_list_from_zip(file_path) + article_renditions = package_utils.get_article_renditions_from_zipped_xml( + file_path, xml_path + ) + + has_errors = False + + for rendition_result in package_utils.evaluate_renditions( + article_renditions, package_files + ): + rendition, expected_filename, is_present = rendition_result + + if not is_present: + has_errors = True + + Package.add_validation_result( + package_id=package_id, + error_category=choices.VE_RENDITION_ERROR, + status=choices.VS_DISAPPROVED, + message=f'{rendition.language} {_("language is mentioned in the XML but its PDF file not present in the package.")}', + data={ + "xml_path": xml_path, + "language": rendition.language, + "is_main_language": rendition.is_main_language, + "missing_file": expected_filename, + }, + ) + + if not has_errors: + Package.add_validation_result( + package_id=package_id, + error_category=choices.VE_RENDITION_ERROR, + status=choices.VS_APPROVED, + data={"xml_path": xml_path}, + ) + return True + + +@celery_app.task(bind=True) +def task_validate_content_xml(self, sps_pkg_id): + xml_str = file_utils.get_xml_content_from_zip(file_path) + + validations = ValidationReportXML( + file_path=xml_str, data_file_path="validation_criteria_example.json" + ).validation_report() + + # data = {} + for result in validations: + for key, value in result.items(): + for result_ind in value: + string_validations = json.dumps(result_ind, default=str) + json_validations = json.loads(string_validations) + + vr = Package.add_validation_result( + package_id=package_id, + error_category=choices.VE_DATA_CONSISTENCY_ERROR, + status=choices.VS_CREATED, + data=json_validations, + ) + + # # TODO + # Realizar logica para verificar se a validacao passou ou nao + ######## + try: + message = json_validations["message"] + except Exception as e: + print(f"Error: {e}") + message = "" + + try: + valor = json_validations["result"] + except Exception as e: + print(f"Error: {e}") + valor = False + + if valor == "success": + status = choices.VS_APPROVED + else: + status = choices.VS_DISAPPROVED + + vr.update( + error_category=choices.VE_XML_FORMAT_ERROR, + message=_(message), + data=data, + status=status, + ) From 1a181d4344b14f5624d8f9b7b0d2c0645b76d19a Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sun, 25 Feb 2024 17:33:18 -0300 Subject: [PATCH 2/7] Cria upload.tasks.task_validate_xml_content --- upload/tasks.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/upload/tasks.py b/upload/tasks.py index ca7cc200..22c82df2 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -22,6 +22,7 @@ from .utils import file_utils, package_utils, xml_utils from upload.models import Package +from upload.xml_validation import validate_xml_content, get_data User = get_user_model() @@ -539,3 +540,71 @@ def _get_user(request, user_id): def task_request_pid_for_accepted_packages(self, user_id): user = _get_user(self.request, user_id) controller.request_pid_for_accepted_packages(user) + + +@celery_app.task(bind=True) +def task_validate_original_zip_file(self, package_id, file_path, journal_id, issue_id, article_id): + + for xml_with_pre in XMLWithPre.create(file_path=file_path): + xml_path = xml_with_pre.filename + break + + if xml_path: + # Aciona validação de Assets + task_validate_assets.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + }, + ) + + # Aciona validação de Renditions + task_validate_renditions.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + }, + ) + + # Aciona validacao do conteudo do XML + + task_validate_xml_content.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + "journal_id": journal_id, + "issue_id": issue_id, + "article_id": article_id, + }, + ) + + +@celery_app.task(bind=True) +def task_validate_xml_content(self, file_path, xml_path, package_id, journal_id, issue_id, article_id): + # VE_BIBLIOMETRICS_DATA_ERROR = "bibliometrics-data-error" + # VE_SERVICES_DATA_ERROR = "services-data-error" + # VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" + # VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" + + data = {} + package = Package.objects.get(pk=package_id) + for xml_with_pre in XMLWithPre.create(file_path=file_path): + results = validate_xml_content(xml_with_pre.sps_pkg_name, xml_with_pre.xmltree, data) + + for result in results: + # ['xpath', 'advice', 'title', 'expected_value', 'got_value', 'message', 'validation_type', 'response'] + if not result["response"] == "ERROR": + continue + + message = result["message"] + advice = result["advice"] or '' + message = ". ".join(_(message), _(advice)) + package._add_validation_result( + error_category=choices.VE_DATA_CONSISTENCY_ERROR, + status=choices.VS_DISAPPROVED, + message=message, + data=result, + ) From 4d74c39a32b502a4ff8036e2f1efd356b0fc68ea Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sun, 25 Feb 2024 17:33:36 -0300 Subject: [PATCH 3/7] Cria upload.xml_validation --- upload/xml_validation.py | 577 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 577 insertions(+) create mode 100644 upload/xml_validation.py diff --git a/upload/xml_validation.py b/upload/xml_validation.py new file mode 100644 index 00000000..304d0b63 --- /dev/null +++ b/upload/xml_validation.py @@ -0,0 +1,577 @@ +import sys + +from packtools.sps.validation.aff import AffiliationsListValidation +from packtools.sps.validation.article_and_subarticles import ( + ArticleLangValidation, + ArticleAttribsValidation, + ArticleIdValidation, + ArticleSubjectsValidation, + ArticleTypeValidation, +) +from packtools.sps.validation.article_authors import ArticleAuthorsValidation + +from packtools.sps.validation.article_data_availability import ( + DataAvailabilityValidation, +) +from packtools.sps.validation.article_doi import ArticleDoiValidation +from packtools.sps.validation.article_lang import ArticleLangValidation +from packtools.sps.validation.article_license import ArticleLicenseValidation +from packtools.sps.validation.article_toc_sections import ArticleTocSectionsValidation +from packtools.sps.validation.article_xref import ArticleXrefValidation +from packtools.sps.validation.dates import ArticleDatesValidation +from packtools.sps.validation.journal_meta import JournalMetaValidation +from packtools.sps.validation.preprint import PreprintValidation +from packtools.sps.validation.related_articles import RelatedArticlesValidation +from tracker.models import UnexpectedEvent + + +def doi_callable_get_data(doi): + return {} + + +def orcid_callable_get_validate(orcid): + return {} + + +def add_app_data(data, app_data): + # TODO + data["country_codes"] = [] + + +def add_journal_data(data, journal, issue): + # TODO + # específico do periódico + data["language_codes"] = [] + + if issue: + data["subjects"] = issue.subjects_list + data["expected_toc_sections"] = issue.toc_sections + else: + data["subjects"] = journal.subjects_list + data["expected_toc_sections"] = journal.toc_sections + + # { + # 'issns': { + # 'ppub': '0103-5053', + # 'epub': '1678-4790' + # }, + # 'acronym': 'hcsm', + # 'journal-title': 'História, Ciências, Saúde-Manguinhos', + # 'abbrev-journal-title': 'Hist. cienc. saude-Manguinhos', + # 'publisher-name': ['Casa de Oswaldo Cruz, Fundação Oswaldo Cruz'], + # 'nlm-ta': 'Rev Saude Publica' + # } + data["journal"] = journal.data + data["expected_license_code"] = journal.license_code + + +def add_sps_data(data, sps_data): + # TODO + # depende do SPS / JATS / Critérios + data["dtd_versions"] = [] + data["sps_versions"] = [] + data["article_types"] = [] + data["expected_article_type_vs_subject_similarity"] = 0 + data["data_availability_specific_uses"] = [] + + data["credit_taxonomy"] = [] + + data["article_type_correspondences"] = [] + + data["future_date"] = "" + data["events_order"] = [] + data["required_events"] = [] + + +def validate_xml_content(sps_pkg_name, xmltree, data): + + functions = ( + validate_affiliations, + validate_languages, + validate_article_attributes, + validate_article_id_other, + validate_subjects, + validate_article_type, + validate_authors, + validate_data_availability, + validate_doi, + validate_article_languages, + validate_licenses, + validate_toc_sections, + validate_xref, + validate_dates, + validate_journal, + validate_preprint, + validate_related_articles, + ) + for f in functions: + yield from f(sps_pkg_name, xmltree, data) + + +def validate_affiliations(sps_pkg_name, xmltree, data): + xml = AffiliationsListValidation(xmltree) + + try: + yield from xml.validade_affiliations_list(data["country_codes"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_affiliations", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_languages(sps_pkg_name, xmltree, data): + xml = ArticleLangValidation(xmltree) + + try: + yield from xml.validate_language(data["language_codes"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_languages", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_attributes(sps_pkg_name, xmltree, data): + xml = ArticleAttribsValidation(xmltree) + + try: + yield from xml.validate_dtd_version(data["dtd_versions"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_dtd_version", + "sps_pkg_name": sps_pkg_name, + }, + ) + + try: + yield from xml.validate_specific_use(data["sps_versions"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_specific_use", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_id_other(sps_pkg_name, xmltree, data): + xml = ArticleIdValidation(xmltree) + + try: + yield from xml.validate_article_id_other() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_id_other", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_subjects(sps_pkg_name, xmltree, data): + xml = ArticleSubjectsValidation(xmltree) + + try: + yield from xml.validate_without_subjects() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_without_subjects", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_type(sps_pkg_name, xmltree, data): + xml = ArticleTypeValidation(xmltree) + + try: + yield from xml.validate_article_type(data["article_types"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_type", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_article_type_vs_subject_similarity( + data["subjects"], data["expected_article_type_vs_subject_similarity"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_type_vs_subject_similarity", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_authors(sps_pkg_name, xmltree, data): + xml = ArticleAuthorsValidation(xmltree) + + try: + yield from xml.validate_authors_orcid_format() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_format", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_orcid_is_registered( + data["callable_get_orcid_data"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_is_registered", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_orcid_is_unique() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_is_unique", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_role(data["credit_taxonomy"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_role", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_data_availability(sps_pkg_name, xmltree, data): + xml = DataAvailabilityValidation(xmltree) + + try: + yield from xml.validate_data_availability( + data["data_availability_specific_uses"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_data_availability", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_doi(sps_pkg_name, xmltree, data): + xml = ArticleDoiValidation(xmltree) + + try: + yield from xml.validate_all_dois_are_unique() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_all_dois_are_unique", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_doi_registered(data["callable_get_doi_data"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_doi_registered", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_main_article_doi_exists() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_main_article_doi_exists", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_translations_doi_exists() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_translations_doi_exists", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_languages(sps_pkg_name, xmltree, data): + xml = ArticleLangValidation(xmltree) + + try: + yield from xml.validate_article_lang() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_lang", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_licenses(sps_pkg_name, xmltree, data): + xml = ArticleLicenseValidation(xmltree) + # yield from xml.validate_license(license_expected_value) + + try: + yield from xml.validate_license_code(data["expected_license_code"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_license_code", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_toc_sections(sps_pkg_name, xmltree, data): + xml = ArticleTocSectionsValidation(xmltree) + + try: + yield from xml.validade_article_title_is_different_from_section_titles() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validade_article_title_is_different_from_section_titles", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_article_toc_sections(data["expected_toc_sections"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_toc_sections", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_xref(sps_pkg_name, xmltree, data): + xml = ArticleXrefValidation(xmltree) + + try: + yield from xml.validate_id() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_id", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_rid() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_rid", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_dates(sps_pkg_name, xmltree, data): + xml = ArticleDatesValidation(xmltree) + + try: + yield from xml.validate_article_date(data["future_date"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_collection_date(data["future_date"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_collection_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_history_dates( + data["events_order"], data["required_events"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_history_dates", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_number_of_digits_in_article_date() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_number_of_digits_in_article_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_journal(sps_pkg_name, xmltree, data): + xml = JournalMetaValidation(xmltree) + + try: + yield from xml.validate(data["journal"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_journal", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_preprint(sps_pkg_name, xmltree, data): + xml = PreprintValidation(xmltree) + + try: + yield from xml.preprint_validation() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.preprint_validation", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_related_articles(sps_pkg_name, xmltree, data): + xml = RelatedArticlesValidation(xmltree) + + try: + yield from xml.related_articles_doi() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_related_articles", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.related_articles_matches_article_type_validation( + data["article_type_correspondences"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.related_articles_matches_article_type_validation", + "sps_pkg_name": sps_pkg_name, + }, + ) From 0d43f9f531f524a17e0aea48ffcc6b4cf2890206 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Mon, 26 Feb 2024 09:46:49 -0300 Subject: [PATCH 4/7] =?UTF-8?q?Anota=20TODO=20para=20inserir=20par=C3=A2me?= =?UTF-8?q?tros=20para=20as=20valida=C3=A7=C3=B5es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- upload/tasks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/upload/tasks.py b/upload/tasks.py index 22c82df2..b41dd812 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -22,7 +22,7 @@ from .utils import file_utils, package_utils, xml_utils from upload.models import Package -from upload.xml_validation import validate_xml_content, get_data +from upload.xml_validation import validate_xml_content, add_app_data, add_sps_data, add_journal_data User = get_user_model() @@ -569,7 +569,6 @@ def task_validate_original_zip_file(self, package_id, file_path, journal_id, iss ) # Aciona validacao do conteudo do XML - task_validate_xml_content.apply_async( kwargs={ "file_path": file_path, @@ -589,7 +588,12 @@ def task_validate_xml_content(self, file_path, xml_path, package_id, journal_id, # VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" # VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" + # TODO completar data data = {} + # add_app_data(data, app_data) + # add_journal_data(data, journal, issue) + # add_sps_data(data, sps_data) + package = Package.objects.get(pk=package_id) for xml_with_pre in XMLWithPre.create(file_path=file_path): results = validate_xml_content(xml_with_pre.sps_pkg_name, xml_with_pre.xmltree, data) From dc8d4be0b0999db57e64b8152d1dd08849ab67ca Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sun, 10 Mar 2024 09:57:01 -0300 Subject: [PATCH 5/7] =?UTF-8?q?Atualiza=20packtools=20para=20a=20vers?= =?UTF-8?q?=C3=A3o=203.3.4=20que=20contempla=20mais=20valida=C3=A7=C3=B5es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements/base.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/base.txt b/requirements/base.txt index ff434c9d..2f472e00 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -60,7 +60,7 @@ minio==7.2 # Upload # ------------------------------------------------------------------------------ lxml==4.9.3 # https://github.com/lxml/lxml --e git+https://github.com/scieloorg/packtools.git@3.3.1#egg=packtools +-e git+https://github.com/scieloorg/packtools.git@3.3.4#egg=packtools -e git+https://github.com/scieloorg/scielo_scholarly_data#egg=scielo_scholarly_data # DSM Publication From c029a4ce0d2da477aa9d86ae83b6242e531ff650 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sun, 10 Mar 2024 10:10:47 -0300 Subject: [PATCH 6/7] Remove package.tasks --- package/tasks.py | 207 ----------------------------------------------- 1 file changed, 207 deletions(-) delete mode 100644 package/tasks.py diff --git a/package/tasks.py b/package/tasks.py deleted file mode 100644 index abfed93d..00000000 --- a/package/tasks.py +++ /dev/null @@ -1,207 +0,0 @@ -import json - -from celery.result import AsyncResult -from django.contrib.auth import get_user_model -from django.utils.translation import gettext as _ -from packtools.sps import exceptions as sps_exceptions -from packtools.sps.models import package as sps_package -from packtools.sps.utils import file_utils as sps_file_utils -from packtools.sps.validation import article as sps_validation_article -from packtools.sps.validation import journal as sps_validation_journal -from packtools.validator import ValidationReportXML - -from article.choices import AS_CHANGE_SUBMITTED -from article.controller import create_article_from_etree, update_article -from article.models import Article -from config import celery_app -from issue.models import Issue -from journal.controller import get_journal_dict_for_validation -from libs.dsm.publication.documents import get_document, get_similar_documents - -from . import choices, controller, exceptions -from .utils import file_utils, package_utils, xml_utils -from upload.models import Package - - -User = get_user_model() - - -@celery_app.task(bind=True) -def task_validate(self, sps_pkg_id): - - task_validate_assets.apply_async( - kwargs={ - "sps_pkg_id": sps_pkg_id, - }, - ) - - # Aciona validação de Renditions - task_validate_renditions.apply_async( - kwargs={ - "sps_pkg_id": sps_pkg_id, - }, - ) - - # Aciona validacao do conteudo do XML - task_validate_content_xml.apply_async( - kwargs={ - "sps_pkg_id": sps_pkg_id, - }, - ) - - -@celery_app.task(bind=True) -def task_validate_assets(self, sps_pkg_id): - package_files = file_utils.get_file_list_from_zip(file_path) - article_assets = package_utils.get_article_assets_from_zipped_xml( - file_path, xml_path - ) - - has_errors = False - - for asset_result in package_utils.evaluate_assets(article_assets, package_files): - asset, is_present = asset_result - - if not is_present: - has_errors = True - Package.add_validation_result( - package_id, - error_category=choices.VE_ASSET_ERROR, - status=choices.VS_DISAPPROVED, - message=f'{asset.name} {_("file is mentioned in the XML but not present in the package.")}', - data={ - "xml_path": xml_path, - "id": asset.id, - "type": asset.type, - "missing_file": asset.name, - }, - ) - - Package.add_validation_result( - package_id, - error_category=choices.VE_ASSET_ERROR, - status=choices.VS_DISAPPROVED, - message=f'{asset.name} {_("file is mentioned in the XML but its optimised version not present in the package.")}', - data={ - "xml_path": xml_path, - "id": asset.id, - "type": "optimised", - "missing_file": file_utils.generate_filepath_with_new_extension( - asset.name, ".png" - ), - }, - ) - - Package.add_validation_result( - package_id, - error_category=choices.VE_ASSET_ERROR, - status=choices.VS_DISAPPROVED, - message=f'{asset.name} {_("file is mentioned in the XML but its thumbnail version not present in the package.")}', - data={ - "xml_path": xml_path, - "id": asset.id, - "type": "thumbnail", - "missing_file": file_utils.generate_filepath_with_new_extension( - asset.name, ".thumbnail.jpg" - ), - }, - ) - - if not has_errors: - Package.add_validation_result( - package_id, - error_category=choices.VE_ASSET_ERROR, - status=choices.VS_APPROVED, - data={"xml_path": xml_path}, - ) - return True - - -@celery_app.task(bind=True) -def task_validate_renditions(self, sps_pkg_id): - package_files = file_utils.get_file_list_from_zip(file_path) - article_renditions = package_utils.get_article_renditions_from_zipped_xml( - file_path, xml_path - ) - - has_errors = False - - for rendition_result in package_utils.evaluate_renditions( - article_renditions, package_files - ): - rendition, expected_filename, is_present = rendition_result - - if not is_present: - has_errors = True - - Package.add_validation_result( - package_id=package_id, - error_category=choices.VE_RENDITION_ERROR, - status=choices.VS_DISAPPROVED, - message=f'{rendition.language} {_("language is mentioned in the XML but its PDF file not present in the package.")}', - data={ - "xml_path": xml_path, - "language": rendition.language, - "is_main_language": rendition.is_main_language, - "missing_file": expected_filename, - }, - ) - - if not has_errors: - Package.add_validation_result( - package_id=package_id, - error_category=choices.VE_RENDITION_ERROR, - status=choices.VS_APPROVED, - data={"xml_path": xml_path}, - ) - return True - - -@celery_app.task(bind=True) -def task_validate_content_xml(self, sps_pkg_id): - xml_str = file_utils.get_xml_content_from_zip(file_path) - - validations = ValidationReportXML( - file_path=xml_str, data_file_path="validation_criteria_example.json" - ).validation_report() - - # data = {} - for result in validations: - for key, value in result.items(): - for result_ind in value: - string_validations = json.dumps(result_ind, default=str) - json_validations = json.loads(string_validations) - - vr = Package.add_validation_result( - package_id=package_id, - error_category=choices.VE_DATA_CONSISTENCY_ERROR, - status=choices.VS_CREATED, - data=json_validations, - ) - - # # TODO - # Realizar logica para verificar se a validacao passou ou nao - ######## - try: - message = json_validations["message"] - except Exception as e: - print(f"Error: {e}") - message = "" - - try: - valor = json_validations["result"] - except Exception as e: - print(f"Error: {e}") - valor = False - - if valor == "success": - status = choices.VS_APPROVED - else: - status = choices.VS_DISAPPROVED - - vr.update( - error_category=choices.VE_XML_FORMAT_ERROR, - message=_(message), - data=data, - status=status, - ) From f568b80600414a165dfcdf7c820c64c9304c778e Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sun, 10 Mar 2024 10:35:14 -0300 Subject: [PATCH 7/7] =?UTF-8?q?Adiciona=20importa=C3=A7=C3=B5es=20faltante?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- upload/models.py | 2 +- upload/tasks.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/upload/models.py b/upload/models.py index 24f414fb..e52a4801 100644 --- a/upload/models.py +++ b/upload/models.py @@ -1,4 +1,4 @@ -from datetime import date, timedelta +from datetime import date, timedelta, datetime from django.contrib.auth import get_user_model from django.db import models diff --git a/upload/tasks.py b/upload/tasks.py index b41dd812..5ed8f808 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -9,6 +9,7 @@ from packtools.sps.validation import article as sps_validation_article from packtools.sps.validation import journal as sps_validation_journal from packtools.validator import ValidationReportXML +from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre from article.choices import AS_CHANGE_SUBMITTED from article.controller import create_article_from_etree, update_article @@ -486,7 +487,7 @@ def task_validate_content_xml(file_path, xml_path, package_id): vr.update( error_category=choices.VE_XML_FORMAT_ERROR, message=_(message), - data=data, + data=json_validations, status=status, )