From 010c8bd82b8b226eb5967d92c4fa3fce82e0f23f Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:04:24 -0300 Subject: [PATCH] =?UTF-8?q?Refatora=20upload=20parte=203=20-=20agrupa=20em?= =?UTF-8?q?=20uma=20tarefa=20as=20valida=C3=A7=C3=B5es:=20assets,=20rendit?= =?UTF-8?q?ions,=20conte=C3=BAdo=20do=20XML=20(#398)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes --- requirements/base.txt | 2 +- upload/tasks.py | 74 +++++ upload/xml_validation.py | 577 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 652 insertions(+), 1 deletion(-) create mode 100644 upload/xml_validation.py diff --git a/requirements/base.txt b/requirements/base.txt index d8a91ee2..14ce94b1 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -60,7 +60,7 @@ minio==7.2 # Upload # ------------------------------------------------------------------------------ lxml==4.9.3 # https://github.com/lxml/lxml --e git+https://github.com/scieloorg/packtools.git@3.3.1#egg=packtools +-e git+https://github.com/scieloorg/packtools.git@3.3.4#egg=packtools -e git+https://github.com/scieloorg/scielo_scholarly_data#egg=scielo_scholarly_data # DSM Publication diff --git a/upload/tasks.py b/upload/tasks.py index 50797046..5ed8f808 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -9,6 +9,7 @@ from packtools.sps.validation import article as sps_validation_article from packtools.sps.validation import journal as sps_validation_journal from packtools.validator import ValidationReportXML +from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre from article.choices import AS_CHANGE_SUBMITTED from article.controller import create_article_from_etree, update_article @@ -22,6 +23,7 @@ from .utils import file_utils, package_utils, xml_utils from upload.models import Package +from upload.xml_validation import validate_xml_content, add_app_data, add_sps_data, add_journal_data User = get_user_model() @@ -539,3 +541,75 @@ def _get_user(request, user_id): def task_request_pid_for_accepted_packages(self, user_id): user = _get_user(self.request, user_id) controller.request_pid_for_accepted_packages(user) + + +@celery_app.task(bind=True) +def task_validate_original_zip_file(self, package_id, file_path, journal_id, issue_id, article_id): + + for xml_with_pre in XMLWithPre.create(file_path=file_path): + xml_path = xml_with_pre.filename + break + + if xml_path: + # Aciona validação de Assets + task_validate_assets.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + }, + ) + + # Aciona validação de Renditions + task_validate_renditions.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + }, + ) + + # Aciona validacao do conteudo do XML + task_validate_xml_content.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + "journal_id": journal_id, + "issue_id": issue_id, + "article_id": article_id, + }, + ) + + +@celery_app.task(bind=True) +def task_validate_xml_content(self, file_path, xml_path, package_id, journal_id, issue_id, article_id): + # VE_BIBLIOMETRICS_DATA_ERROR = "bibliometrics-data-error" + # VE_SERVICES_DATA_ERROR = "services-data-error" + # VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" + # VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" + + # TODO completar data + data = {} + # add_app_data(data, app_data) + # add_journal_data(data, journal, issue) + # add_sps_data(data, sps_data) + + package = Package.objects.get(pk=package_id) + for xml_with_pre in XMLWithPre.create(file_path=file_path): + results = validate_xml_content(xml_with_pre.sps_pkg_name, xml_with_pre.xmltree, data) + + for result in results: + # ['xpath', 'advice', 'title', 'expected_value', 'got_value', 'message', 'validation_type', 'response'] + if not result["response"] == "ERROR": + continue + + message = result["message"] + advice = result["advice"] or '' + message = ". ".join(_(message), _(advice)) + package._add_validation_result( + error_category=choices.VE_DATA_CONSISTENCY_ERROR, + status=choices.VS_DISAPPROVED, + message=message, + data=result, + ) diff --git a/upload/xml_validation.py b/upload/xml_validation.py new file mode 100644 index 00000000..304d0b63 --- /dev/null +++ b/upload/xml_validation.py @@ -0,0 +1,577 @@ +import sys + +from packtools.sps.validation.aff import AffiliationsListValidation +from packtools.sps.validation.article_and_subarticles import ( + ArticleLangValidation, + ArticleAttribsValidation, + ArticleIdValidation, + ArticleSubjectsValidation, + ArticleTypeValidation, +) +from packtools.sps.validation.article_authors import ArticleAuthorsValidation + +from packtools.sps.validation.article_data_availability import ( + DataAvailabilityValidation, +) +from packtools.sps.validation.article_doi import ArticleDoiValidation +from packtools.sps.validation.article_lang import ArticleLangValidation +from packtools.sps.validation.article_license import ArticleLicenseValidation +from packtools.sps.validation.article_toc_sections import ArticleTocSectionsValidation +from packtools.sps.validation.article_xref import ArticleXrefValidation +from packtools.sps.validation.dates import ArticleDatesValidation +from packtools.sps.validation.journal_meta import JournalMetaValidation +from packtools.sps.validation.preprint import PreprintValidation +from packtools.sps.validation.related_articles import RelatedArticlesValidation +from tracker.models import UnexpectedEvent + + +def doi_callable_get_data(doi): + return {} + + +def orcid_callable_get_validate(orcid): + return {} + + +def add_app_data(data, app_data): + # TODO + data["country_codes"] = [] + + +def add_journal_data(data, journal, issue): + # TODO + # específico do periódico + data["language_codes"] = [] + + if issue: + data["subjects"] = issue.subjects_list + data["expected_toc_sections"] = issue.toc_sections + else: + data["subjects"] = journal.subjects_list + data["expected_toc_sections"] = journal.toc_sections + + # { + # 'issns': { + # 'ppub': '0103-5053', + # 'epub': '1678-4790' + # }, + # 'acronym': 'hcsm', + # 'journal-title': 'História, Ciências, Saúde-Manguinhos', + # 'abbrev-journal-title': 'Hist. cienc. saude-Manguinhos', + # 'publisher-name': ['Casa de Oswaldo Cruz, Fundação Oswaldo Cruz'], + # 'nlm-ta': 'Rev Saude Publica' + # } + data["journal"] = journal.data + data["expected_license_code"] = journal.license_code + + +def add_sps_data(data, sps_data): + # TODO + # depende do SPS / JATS / Critérios + data["dtd_versions"] = [] + data["sps_versions"] = [] + data["article_types"] = [] + data["expected_article_type_vs_subject_similarity"] = 0 + data["data_availability_specific_uses"] = [] + + data["credit_taxonomy"] = [] + + data["article_type_correspondences"] = [] + + data["future_date"] = "" + data["events_order"] = [] + data["required_events"] = [] + + +def validate_xml_content(sps_pkg_name, xmltree, data): + + functions = ( + validate_affiliations, + validate_languages, + validate_article_attributes, + validate_article_id_other, + validate_subjects, + validate_article_type, + validate_authors, + validate_data_availability, + validate_doi, + validate_article_languages, + validate_licenses, + validate_toc_sections, + validate_xref, + validate_dates, + validate_journal, + validate_preprint, + validate_related_articles, + ) + for f in functions: + yield from f(sps_pkg_name, xmltree, data) + + +def validate_affiliations(sps_pkg_name, xmltree, data): + xml = AffiliationsListValidation(xmltree) + + try: + yield from xml.validade_affiliations_list(data["country_codes"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_affiliations", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_languages(sps_pkg_name, xmltree, data): + xml = ArticleLangValidation(xmltree) + + try: + yield from xml.validate_language(data["language_codes"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_languages", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_attributes(sps_pkg_name, xmltree, data): + xml = ArticleAttribsValidation(xmltree) + + try: + yield from xml.validate_dtd_version(data["dtd_versions"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_dtd_version", + "sps_pkg_name": sps_pkg_name, + }, + ) + + try: + yield from xml.validate_specific_use(data["sps_versions"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_specific_use", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_id_other(sps_pkg_name, xmltree, data): + xml = ArticleIdValidation(xmltree) + + try: + yield from xml.validate_article_id_other() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_id_other", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_subjects(sps_pkg_name, xmltree, data): + xml = ArticleSubjectsValidation(xmltree) + + try: + yield from xml.validate_without_subjects() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_without_subjects", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_type(sps_pkg_name, xmltree, data): + xml = ArticleTypeValidation(xmltree) + + try: + yield from xml.validate_article_type(data["article_types"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_type", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_article_type_vs_subject_similarity( + data["subjects"], data["expected_article_type_vs_subject_similarity"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_type_vs_subject_similarity", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_authors(sps_pkg_name, xmltree, data): + xml = ArticleAuthorsValidation(xmltree) + + try: + yield from xml.validate_authors_orcid_format() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_format", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_orcid_is_registered( + data["callable_get_orcid_data"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_is_registered", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_orcid_is_unique() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_is_unique", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_role(data["credit_taxonomy"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_role", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_data_availability(sps_pkg_name, xmltree, data): + xml = DataAvailabilityValidation(xmltree) + + try: + yield from xml.validate_data_availability( + data["data_availability_specific_uses"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_data_availability", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_doi(sps_pkg_name, xmltree, data): + xml = ArticleDoiValidation(xmltree) + + try: + yield from xml.validate_all_dois_are_unique() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_all_dois_are_unique", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_doi_registered(data["callable_get_doi_data"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_doi_registered", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_main_article_doi_exists() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_main_article_doi_exists", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_translations_doi_exists() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_translations_doi_exists", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_languages(sps_pkg_name, xmltree, data): + xml = ArticleLangValidation(xmltree) + + try: + yield from xml.validate_article_lang() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_lang", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_licenses(sps_pkg_name, xmltree, data): + xml = ArticleLicenseValidation(xmltree) + # yield from xml.validate_license(license_expected_value) + + try: + yield from xml.validate_license_code(data["expected_license_code"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_license_code", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_toc_sections(sps_pkg_name, xmltree, data): + xml = ArticleTocSectionsValidation(xmltree) + + try: + yield from xml.validade_article_title_is_different_from_section_titles() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validade_article_title_is_different_from_section_titles", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_article_toc_sections(data["expected_toc_sections"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_toc_sections", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_xref(sps_pkg_name, xmltree, data): + xml = ArticleXrefValidation(xmltree) + + try: + yield from xml.validate_id() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_id", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_rid() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_rid", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_dates(sps_pkg_name, xmltree, data): + xml = ArticleDatesValidation(xmltree) + + try: + yield from xml.validate_article_date(data["future_date"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_collection_date(data["future_date"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_collection_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_history_dates( + data["events_order"], data["required_events"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_history_dates", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_number_of_digits_in_article_date() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_number_of_digits_in_article_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_journal(sps_pkg_name, xmltree, data): + xml = JournalMetaValidation(xmltree) + + try: + yield from xml.validate(data["journal"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_journal", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_preprint(sps_pkg_name, xmltree, data): + xml = PreprintValidation(xmltree) + + try: + yield from xml.preprint_validation() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.preprint_validation", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_related_articles(sps_pkg_name, xmltree, data): + xml = RelatedArticlesValidation(xmltree) + + try: + yield from xml.related_articles_doi() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_related_articles", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.related_articles_matches_article_type_validation( + data["article_type_correspondences"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.related_articles_matches_article_type_validation", + "sps_pkg_name": sps_pkg_name, + }, + )