From d8901ebcb36a342f43aefae89513348f86566ebf Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Mon, 11 Mar 2024 07:57:11 -0300 Subject: [PATCH 01/25] Corrige Institution.__str__, adiciona atributos de autocomplete e altera InstitutionHistory.panels de FieldPanel para Autocomplete (#401) --- institution/models.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/institution/models.py b/institution/models.py index 971aa741..bce1afad 100644 --- a/institution/models.py +++ b/institution/models.py @@ -2,6 +2,7 @@ from django.utils.translation import gettext as _ from modelcluster.models import ClusterableModel from wagtail.admin.panels import FieldPanel, InlinePanel +from wagtailautocomplete.edit_handlers import AutocompletePanel from core.models import CommonControlField from location.models import Location @@ -48,8 +49,13 @@ class Institution(CommonControlField, ClusterableModel): FieldPanel("logo"), ] + autocomplete_search_field = "name" + + def autocomplete_label(self): + return str(self) + def __unicode__(self): - return "%s | %s | %s | %s | %s" % ( + return "%s | %s | %s | %s | %s | %s" % ( self.name, self.acronym, self.level_1, @@ -59,7 +65,7 @@ def __unicode__(self): ) def __str__(self): - return "%s | %s | %s | %s | %s" % ( + return "%s | %s | %s | %s | %s | %s" % ( self.name, self.acronym, self.level_1, @@ -133,7 +139,7 @@ class InstitutionHistory(models.Model): final_date = models.DateField(_("Final Date"), null=True, blank=True) panels = [ - FieldPanel("institution", heading=_("Institution")), + AutocompletePanel("institution", heading=_("Institution")), FieldPanel("initial_date"), FieldPanel("final_date"), ] From 9e82f257b432a326b15d11118d4774899d1a15b5 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Mon, 11 Mar 2024 08:45:15 -0300 Subject: [PATCH 02/25] =?UTF-8?q?Faz=20corre=C3=A7=C3=B5es=20na=20app=20jo?= =?UTF-8?q?urnal:=20adiciona=20Journal.title,=20wagtail=5Fhooks.JournalCre?= =?UTF-8?q?ateView,=20etc=20=20(#402)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adiciona Journal.title * Modifica os atributos de journal.models.Owner e Publisher * Cria journal.wagtail.JournalCreateView para adicionar o usuário como creator * Adiciona migrações de banco de dados relacionados a journal --- ...ner_page_remove_publisher_page_and_more.py | 51 +++++++++++++++++++ journal/models.py | 18 +++++-- journal/wagtail_hooks.py | 11 +++- 3 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 journal/migrations/0002_remove_owner_page_remove_publisher_page_and_more.py diff --git a/journal/migrations/0002_remove_owner_page_remove_publisher_page_and_more.py b/journal/migrations/0002_remove_owner_page_remove_publisher_page_and_more.py new file mode 100644 index 00000000..d17b893a --- /dev/null +++ b/journal/migrations/0002_remove_owner_page_remove_publisher_page_and_more.py @@ -0,0 +1,51 @@ +# Generated by Django 4.2.6 on 2024-03-11 11:23 + +from django.db import migrations, models +import django.db.models.deletion +import modelcluster.fields + + +class Migration(migrations.Migration): + dependencies = [ + ("journal", "0001_initial"), + ] + + operations = [ + migrations.RemoveField( + model_name="owner", + name="page", + ), + migrations.RemoveField( + model_name="publisher", + name="page", + ), + migrations.AddField( + model_name="journal", + name="title", + field=models.CharField( + blank=True, max_length=265, null=True, verbose_name="Title" + ), + ), + migrations.AddField( + model_name="owner", + name="journal", + field=modelcluster.fields.ParentalKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="owner", + to="journal.journal", + ), + ), + migrations.AddField( + model_name="publisher", + name="journal", + field=modelcluster.fields.ParentalKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="publisher", + to="journal.journal", + ), + ), + ] diff --git a/journal/models.py b/journal/models.py index 4fc5d935..e932d752 100644 --- a/journal/models.py +++ b/journal/models.py @@ -127,6 +127,9 @@ class Journal(CommonControlField, ClusterableModel): short_title = models.CharField( _("Short Title"), max_length=100, null=True, blank=True ) + title = models.CharField( + _("Title"), max_length=265, null=True, blank=True + ) official_journal = models.ForeignKey( "OfficialJournal", null=True, @@ -136,10 +139,10 @@ class Journal(CommonControlField, ClusterableModel): ) def __unicode__(self): - return self.short_title or str(self.official_journal) + return self.title or self.short_title or str(self.official_journal) def __str__(self): - return self.short_title or str(self.official_journal) + return self.title or self.short_title or str(self.official_journal) base_form_class = OfficialJournalForm @@ -165,7 +168,7 @@ def __str__(self): ) def autocomplete_label(self): - return self.official_journal.title + return self.title or self.official_journal.title @property def logo_url(self): @@ -182,6 +185,8 @@ def create_or_update( cls, user, official_journal=None, + title=None, + short_title=None, ): logging.info(f"Journal.create_or_update({official_journal}") try: @@ -196,14 +201,17 @@ def create_or_update( logging.info("create {}".format(obj)) obj.official_journal = official_journal or obj.official_journal + obj.title = title or obj.title + obj.short_title = short_title or obj.short_title + obj.save() logging.info(f"return {obj}") return obj class Owner(Orderable, InstitutionHistory): - page = ParentalKey(Journal, related_name="owner") + journal = ParentalKey(Journal, related_name="owner", null=True, blank=True, on_delete=models.SET_NULL) class Publisher(Orderable, InstitutionHistory): - page = ParentalKey(Journal, related_name="publisher") + journal = ParentalKey(Journal, related_name="publisher", null=True, blank=True, on_delete=models.SET_NULL) diff --git a/journal/wagtail_hooks.py b/journal/wagtail_hooks.py index 385b193e..2209a53a 100644 --- a/journal/wagtail_hooks.py +++ b/journal/wagtail_hooks.py @@ -47,16 +47,23 @@ class OfficialJournalAdmin(ModelAdmin): ) +class JournalCreateView(CreateView): + def form_valid(self, form): + self.object = form.save_all(self.request.user) + return HttpResponseRedirect(self.get_success_url()) + + class JournalAdmin(ModelAdmin): model = Journal menu_label = _("Journal") + create_view_class = JournalCreateView menu_icon = "folder" menu_order = 200 add_to_settings_menu = False exclude_from_explorer = False - list_display = ("official_journal", "short_title") - search_fields = ("official_journal__title", "short_title") + list_display = ("title", "short_title") + search_fields = ("official_journal__issn_electronic", "official_journal__issn_print", "short_title") class JournalModelAdminGroup(ModelAdminGroup): From 4c9e84d9d2fa7dc46cbff4df3e7d672a2058666c Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Thu, 14 Mar 2024 00:50:36 -0300 Subject: [PATCH 03/25] Adiciona filtros de journal_acron e publication_year para migrar dados de artigos (#403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos, criando uma amostragem de migração * Adiciona os parâmetros journal_acron e publication_year --- bigbang/tasks_scheduler.py | 4 ++++ migration/tasks.py | 13 +++++++++++-- proc/models.py | 18 ++++++++++++++++-- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/bigbang/tasks_scheduler.py b/bigbang/tasks_scheduler.py index 2090924c..90c5da4e 100644 --- a/bigbang/tasks_scheduler.py +++ b/bigbang/tasks_scheduler.py @@ -251,6 +251,8 @@ def _schedule_migrate_document_files_and_records(username, enabled): name="task_migrate_document_files", kwargs=dict( username=username, + journal_acron=None, + publication_year=None, force_update=False, ), description=_("Migra arquivos dos documentos"), @@ -266,6 +268,8 @@ def _schedule_migrate_document_files_and_records(username, enabled): name="task_migrate_document_records", kwargs=dict( username=username, + journal_acron=None, + publication_year=None, force_update=False, ), description=_("Migra registros dos documentos"), diff --git a/migration/tasks.py b/migration/tasks.py index f38ab757..e044a265 100644 --- a/migration/tasks.py +++ b/migration/tasks.py @@ -262,11 +262,15 @@ def task_migrate_document_files( user_id=None, username=None, collection_acron=None, + journal_acron=None, + publication_year=None, force_update=False, ): try: + publication_year = publication_year and str(publication_year) for collection in _get_collections(collection_acron): - items = IssueProc.files_to_migrate(collection, force_update) + items = IssueProc.files_to_migrate( + collection, journal_acron, publication_year, force_update) for item in items: # Importa os arquivos das pastas */acron/volnum/* task_import_one_issue_files.apply_async( @@ -328,11 +332,16 @@ def task_migrate_document_records( user_id=None, username=None, collection_acron=None, + journal_acron=None, + publication_year=None, force_update=False, ): try: + publication_year = publication_year and str(publication_year) + for collection in _get_collections(collection_acron): - items = IssueProc.docs_to_migrate(collection, force_update) + items = IssueProc.docs_to_migrate( + collection, journal_acron, publication_year, force_update) for item in items: # Importa os registros de documentos task_import_one_issue_document_records.apply_async( diff --git a/proc/models.py b/proc/models.py index 7db8b73b..9646297e 100644 --- a/proc/models.py +++ b/proc/models.py @@ -716,7 +716,7 @@ def update( ) @classmethod - def files_to_migrate(cls, collection, force_update): + def files_to_migrate(cls, collection, journal_acron, publication_year, force_update): """ Muda o status de PROGRESS_STATUS_REPROC para PROGRESS_STATUS_TODO E se force_update = True, muda o status de PROGRESS_STATUS_DONE para PROGRESS_STATUS_TODO @@ -735,10 +735,17 @@ def files_to_migrate(cls, collection, force_update): migration_status=tracker_choices.PROGRESS_STATUS_DONE, ).update(files_status=tracker_choices.PROGRESS_STATUS_TODO) + params = {} + if publication_year: + params['issue__publication_year'] = publication_year + if journal_acron: + params['journal_proc__acron'] = journal_acron + return cls.objects.filter( files_status=tracker_choices.PROGRESS_STATUS_TODO, collection=collection, migration_status=tracker_choices.PROGRESS_STATUS_DONE, + **params, ).iterator() def get_files_from_classic_website( @@ -790,7 +797,7 @@ def get_files_from_classic_website( ) @classmethod - def docs_to_migrate(cls, collection, force_update): + def docs_to_migrate(cls, collection, journal_acron, publication_year, force_update): """ Muda o status de PROGRESS_STATUS_REPROC para PROGRESS_STATUS_TODO E se force_update = True, muda o status de PROGRESS_STATUS_DONE para PROGRESS_STATUS_TODO @@ -809,10 +816,17 @@ def docs_to_migrate(cls, collection, force_update): migration_status=tracker_choices.PROGRESS_STATUS_DONE, ).update(docs_status=tracker_choices.PROGRESS_STATUS_TODO) + params = {} + if publication_year: + params['issue__publication_year'] = publication_year + if journal_acron: + params['journal_proc__acron'] = journal_acron + return cls.objects.filter( docs_status=tracker_choices.PROGRESS_STATUS_TODO, collection=collection, migration_status=tracker_choices.PROGRESS_STATUS_DONE, + **params, ).iterator() def get_article_records_from_classic_website( From 97bd478f1dc0908d1798c91eb5f77cdbf5466c28 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:44:50 -0300 Subject: [PATCH 04/25] Garante que no XML migrado (seja nativo ou gerado a partir do HTML) tenha o PID v2 e o order (article-id other) (#405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Corrige ou adiciona ao XML o elemento pid-v2 usando como valor o pid do artigo do site clássico * Atualiza packtools versão 3.4.0 para ter XMLWithPre.order Corrige ou adiciona ao XML o elemento article-id (other/order) usando como valor os últimos 5 dígitos do pid do artigo do site clássico * Atualiza a versão da biblioteca scielo_classic_website para 1.6.4 para corrigir a obtenção de registros de artigos em serial xml * Evita guardar versões anteriores dos arquivos --- migration/controller.py | 10 ++++++++++ migration/models.py | 10 ++++------ pid_provider/models.py | 4 ++++ requirements/base.txt | 4 ++-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/migration/controller.py b/migration/controller.py index c895ac65..c8ccbd7c 100644 --- a/migration/controller.py +++ b/migration/controller.py @@ -501,6 +501,16 @@ def get_migrated_xml_with_pre(article_proc): xml_file_path = None xml_file_path = obj.file.path for item in XMLWithPre.create(path=xml_file_path): + if article_proc.pid and item.v2 != article_proc.pid: + # corrige ou adiciona pid v2 no XML nativo ou obtido do html + # usando o valor do pid v2 do site clássico + item.v2 = article_proc.pid + + order = str(int(article_proc.pid[-5:])) + if not item.order or str(int(item.order)) != order: + # corrige ou adiciona other pid no XML nativo ou obtido do html + # usando o valor do "order" do site clássico + item.order = article_proc.pid[-5:] return item except Exception as e: raise XMLVersionXmlWithPreError( diff --git a/migration/models.py b/migration/models.py index 92e78a44..5f025bf8 100644 --- a/migration/models.py +++ b/migration/models.py @@ -448,12 +448,10 @@ def get_original_href(self, original_path): pass def save_file(self, name, content, delete=False): - if self.file: - if delete: - try: - self.file.delete(save=True) - except Exception as e: - pass + try: + self.file.delete(save=True) + except Exception as e: + pass self.file.save(name, ContentFile(content)) def is_up_to_date(self, file_date): diff --git a/pid_provider/models.py b/pid_provider/models.py index 70c0a5d3..6ce60c7d 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -93,6 +93,10 @@ def create( return cls.get(pid_provider_xml, xml_with_pre.finger_print) def save_file(self, filename, content): + try: + self.file.delete(save=True) + except Exception as e: + pass self.file.save(filename, ContentFile(content)) @property diff --git a/requirements/base.txt b/requirements/base.txt index d8a91ee2..a6236963 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -60,7 +60,7 @@ minio==7.2 # Upload # ------------------------------------------------------------------------------ lxml==4.9.3 # https://github.com/lxml/lxml --e git+https://github.com/scieloorg/packtools.git@3.3.1#egg=packtools +-e git+https://github.com/scieloorg/packtools.git@3.4.0#egg=packtools -e git+https://github.com/scieloorg/scielo_scholarly_data#egg=scielo_scholarly_data # DSM Publication @@ -73,7 +73,7 @@ python-magic==0.4.27 # DSM Migration # ------------------------------------------------------------------------------ --e git+https://github.com/scieloorg/scielo_migration.git@1.6.3#egg=scielo_classic_website +-e git+https://github.com/scieloorg/scielo_migration.git@1.6.4#egg=scielo_classic_website #-e git+https://github.com/scieloorg/scielo_migration.git#egg=scielo_classic_website python-dateutil==2.8.2 tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability From d647f7b8cc6480817f87ecb8d84b1d28b46896db Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sat, 23 Mar 2024 13:25:33 -0300 Subject: [PATCH 05/25] Cria o procedimento de corrigir o valor do Pid v2 (#410) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria PidProviderXML.fix_pid_v2 * Cria FixPidV2 para controlar o que foi corrigido no upload e no core * Cria FixPidV2ModelAdmin * Adiciona PidProviderAPIClient.fix_pid_v2, fix_pid_v2_url. Refatora PidProviderAPIClient.enabled * Cria APIPidProviderFixPidV2Error * Cria provider.requester.PidRequester.fix_pid_v2 * Cria SPSPkg.fix_pid_v2 * Cria ArticleProc.fix_pid_v2 e adiciona a chamada no procedimento de generate_sps_package * Cria tarefas para corriger o valor de pid v2 em PidProviderXML a partir de ArticleProc.pid * Cria provider.provider.PidProvider com os métodos fix_pid_v2, get_sps_pkg_name, get_xmltree * Adiciona a migração correspondente ao modelo FixPidV2 --- package/models.py | 4 +- pid_provider/client.py | 76 +++++++- pid_provider/exceptions.py | 8 + ...2_alter_pidproviderxml_options_fixpidv2.py | 119 ++++++++++++ pid_provider/models.py | 176 +++++++++++++++++- pid_provider/provider.py | 24 ++- pid_provider/requester.py | 61 +++++- pid_provider/tasks.py | 31 +++ pid_provider/wagtail_hooks.py | 34 ++++ proc/models.py | 9 + 10 files changed, 530 insertions(+), 12 deletions(-) create mode 100644 pid_provider/migrations/0002_alter_pidproviderxml_options_fixpidv2.py diff --git a/package/models.py b/package/models.py index afc3899a..1e995c5e 100644 --- a/package/models.py +++ b/package/models.py @@ -465,7 +465,6 @@ def create_or_update( ): try: operation = article_proc.start(user, "SPSPkg.create_or_update") - obj = cls.add_pid_v3_to_zip(user, sps_pkg_zip_path, is_public, article_proc) obj.origin = origin or obj.origin obj.is_public = is_public or obj.is_public @@ -538,6 +537,9 @@ def is_registered_xml_zip(cls, zip_xml_file_path): pass yield item + def fix_pid_v2(self, user, correct_pid_v2): + return pid_provider_app.fix_pid_v2(user, self.pid_v3, correct_pid_v2) + @classmethod def add_pid_v3_to_zip(cls, user, zip_xml_file_path, is_public, article_proc): """ diff --git a/pid_provider/client.py b/pid_provider/client.py index 8fca99ee..977a58a7 100644 --- a/pid_provider/client.py +++ b/pid_provider/client.py @@ -24,7 +24,7 @@ class PidProviderAPIClient: """ - Interface com o pid provider + Interface com o pid provider do Core """ def __init__( @@ -44,9 +44,10 @@ def __init__( @property def enabled(self): - if self.config: + try: return bool(self.config.api_username and self.config.api_password) - return False + except (AttributeError, ValueError, TypeError): + return False @property def config(self): @@ -58,6 +59,18 @@ def config(self): self._config = None return self._config + @property + def fix_pid_v2_url(self): + if not hasattr(self, "_fix_pid_v2_url") or not self._fix_pid_v2_url: + try: + if self.pid_provider_api_post_xml: + self._fix_pid_v2_url = self.pid_provider_api_post_xml.replace( + "pid_provider", "fix_pid_v2" + ) + except AttributeError as e: + raise exceptions.APIPidProviderConfigError(e) + return self._fix_pid_v2_url + @property def pid_provider_api_post_xml(self): if self._pid_provider_api_post_xml is None: @@ -247,3 +260,60 @@ def _process_post_xml_response(self, response, xml_with_pre): break except KeyError: pass + + def fix_pid_v2(self, pid_v3, correct_pid_v2): + """ + name : str + nome do arquivo xml + """ + try: + + self.token = self.token or self._get_token( + username=self.api_username, + password=self.api_password, + timeout=self.timeout, + ) + response = self._post_fix_pid_v2(pid_v3, correct_pid_v2, self.token, self.timeout) + response["fixed_in_core"] = response.get("v2") == correct_pid_v2 + return response + except ( + exceptions.GetAPITokenError, + exceptions.APIPidProviderPostError, + exceptions.APIPidProviderConfigError, + ) as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + return { + "error_msg": str(e), + "error_type": str(type(e)), + "traceback": [ + str(item) for item in traceback.extract_tb(exc_traceback) + ], + } + + def _post_fix_pid_v2(self, pid_v3, correct_pid_v2, token, timeout): + header = { + "Authorization": "Bearer " + token, + # "content-type": "multi-part/form-data", + # "content-type": "application/json", + } + try: + uri = self.fix_pid_v2_url + return post_data( + uri, + data={"pid_v3": pid_v3, "correct_pid_v2": correct_pid_v2}, + headers=header, + timeout=timeout, + verify=False, + json=True, + ) + except Exception as e: + logging.exception(e) + raise exceptions.APIPidProviderFixPidV2Error( + _("Unable to get pid from pid provider {} {} {} {} {}").format( + uri, + pid_v3, + correct_pid_v2, + type(e), + e, + ) + ) diff --git a/pid_provider/exceptions.py b/pid_provider/exceptions.py index 33fb58b6..2eee175f 100644 --- a/pid_provider/exceptions.py +++ b/pid_provider/exceptions.py @@ -36,3 +36,11 @@ class APIPidProviderConfigError(Exception): class InvalidPidError(Exception): ... + + +class PidProviderXMLFixPidV2Error(Exception): + ... + + +class APIPidProviderFixPidV2Error(Exception): + ... diff --git a/pid_provider/migrations/0002_alter_pidproviderxml_options_fixpidv2.py b/pid_provider/migrations/0002_alter_pidproviderxml_options_fixpidv2.py new file mode 100644 index 00000000..5b29f72f --- /dev/null +++ b/pid_provider/migrations/0002_alter_pidproviderxml_options_fixpidv2.py @@ -0,0 +1,119 @@ +# Generated by Django 4.2.6 on 2024-03-22 22:37 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("pid_provider", "0001_initial"), + ] + + operations = [ + migrations.AlterModelOptions( + name="pidproviderxml", + options={"ordering": ["-updated", "-created", "pkg_name"]}, + ), + migrations.CreateModel( + name="FixPidV2", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "incorrect_pid_v2", + models.CharField( + blank=True, + max_length=24, + null=True, + verbose_name="Incorrect v2", + ), + ), + ( + "correct_pid_v2", + models.CharField( + blank=True, max_length=24, null=True, verbose_name="Correct v2" + ), + ), + ( + "fixed_in_upload", + models.BooleanField(blank=True, default=None, null=True), + ), + ( + "fixed_in_core", + models.BooleanField(blank=True, default=None, null=True), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "pid_provider_xml", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="pid_provider.pidproviderxml", + unique=True, + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "ordering": ["-updated", "-created"], + "indexes": [ + models.Index( + fields=["incorrect_pid_v2"], + name="pid_provide_incorre_17a11a_idx", + ), + models.Index( + fields=["correct_pid_v2"], name="pid_provide_correct_ac149d_idx" + ), + models.Index( + fields=["fixed_in_core"], name="pid_provide_fixed_i_4e3bd1_idx" + ), + models.Index( + fields=["fixed_in_upload"], + name="pid_provide_fixed_i_0109bc_idx", + ), + ], + }, + ), + ] diff --git a/pid_provider/models.py b/pid_provider/models.py index 6ce60c7d..f66d41d9 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -494,7 +494,7 @@ class PidProviderXML(CommonControlField, ClusterableModel): base_form_class = CoreAdminModelForm panel_a = [ - FieldPanel("registered_in_core", read_only=True), + FieldPanel("registered_in_core"), FieldPanel("issn_electronic", read_only=True), FieldPanel("issn_print", read_only=True), FieldPanel("pub_year", read_only=True), @@ -531,6 +531,7 @@ class PidProviderXML(CommonControlField, ClusterableModel): ) class Meta: + ordering = ["-updated", "-created", "pkg_name"] indexes = [ models.Index(fields=["pkg_name"]), models.Index(fields=["v3"]), @@ -563,7 +564,7 @@ def __str__(self): def public_items(cls, from_date): now = datetime.utcnow().isoformat()[:10] return cls.objects.filter( - Q(available_since__lte=now) + (Q(available_since__isnull=True) | Q(available_since__lte=now)) & (Q(created__gte=from_date) | Q(updated__gte=from_date)), current_version__pid_provider_xml__v3__isnull=False, ).iterator() @@ -661,7 +662,11 @@ def register( # analisa se aceita ou rejeita registro updated_data = cls.skip_registration( - xml_adapter, registered, force_update, origin_date, registered_in_core, + xml_adapter, + registered, + force_update, + origin_date, + registered_in_core, ) if updated_data: return updated_data @@ -685,7 +690,11 @@ def register( # compara de novo, após completar pids updated_data = cls.skip_registration( - xml_adapter, registered, force_update, origin_date, registered_in_core, + xml_adapter, + registered, + force_update, + origin_date, + registered_in_core, ) if updated_data: # XML da entrada e registrado divergem: não tem e tem pids, @@ -796,7 +805,9 @@ def _save( return registered @classmethod - def skip_registration(cls, xml_adapter, registered, force_update, origin_date, registered_in_core): + def skip_registration( + cls, xml_adapter, registered, force_update, origin_date, registered_in_core + ): """ XML é versão AOP, mas documento está registrado com versão VoR (fascículo), @@ -1266,8 +1277,32 @@ def is_registered(cls, xml_with_pre): return {"error_msg": str(e), "error_type": str(type(e))} return {} + def fix_pid_v2(self, user, correct_pid_v2): + try: + if correct_pid_v2 == self.v2: + return self.data + xml_with_pre = self.current_version.xml_with_pre + try: + self.current_version.delete() + except Exception as e: + pass + xml_with_pre.v2 = correct_pid_v2 + self.current_version = XMLVersion.get_or_create(user, self, xml_with_pre) + self.v2 = correct_pid_v2 + self.save() + return self.data + except Exception as e: + raise exceptions.PidProviderXMLFixPidV2Error( + f"Unable to fix pid v2 for {self.v3} {e} {type(e)}" + ) + class CollectionPidRequest(CommonControlField): + """ + Uso exclusivo no Core + para controlar a entrada de XML provenientes do AM + registrando cada coleção e a data da coleta + """ collection = models.ForeignKey( Collection, on_delete=models.SET_NULL, null=True, blank=True ) @@ -1337,3 +1372,134 @@ def create_or_update( return obj except cls.DoesNotExist: return cls.create(user, collection, end_date) + + +class FixPidV2(CommonControlField): + """ + Uso exclusivo da aplicação Upload + Para gerenciar os pids v2 que foram ou não corrigidos no Upload e no Core + """ + + pid_provider_xml = models.ForeignKey( + PidProviderXML, on_delete=models.SET_NULL, null=True, blank=True, unique=True + ) + incorrect_pid_v2 = models.CharField( + _("Incorrect v2"), max_length=24, null=True, blank=True + ) + correct_pid_v2 = models.CharField( + _("Correct v2"), max_length=24, null=True, blank=True + ) + fixed_in_upload = models.BooleanField(null=True, blank=True, default=None) + fixed_in_core = models.BooleanField(null=True, blank=True, default=None) + + base_form_class = CoreAdminModelForm + + panels = [ + FieldPanel("incorrect_pid_v2", read_only=True), + FieldPanel("correct_pid_v2", read_only=True), + FieldPanel("fixed_in_core"), + FieldPanel("fixed_in_upload"), + ] + + class Meta: + ordering = ["-updated", "-created"] + + indexes = [ + models.Index(fields=["incorrect_pid_v2"]), + models.Index(fields=["correct_pid_v2"]), + models.Index(fields=["fixed_in_core"]), + models.Index(fields=["fixed_in_upload"]), + ] + + def __str__(self): + return f"{self.pid_provider_xml.v3}" + + @staticmethod + def autocomplete_custom_queryset_filter(search_term): + return FixPidV2.objects.filter(pid_provider_xml__v3__icontains=search_term) + + def autocomplete_label(self): + return f"{self.pid_provider_xml.v3}" + + @classmethod + def get(cls, pid_provider_xml=None): + if pid_provider_xml: + return cls.objects.get(pid_provider_xml=pid_provider_xml) + raise ValueError("FixPidV2.get requires pid_v3") + + @classmethod + def create( + cls, + user, + pid_provider_xml=None, + incorrect_pid_v2=None, + correct_pid_v2=None, + fixed_in_core=None, + fixed_in_upload=None, + ): + if correct_pid_v2 == incorrect_pid_v2 or not correct_pid_v2 or not incorrect_pid_v2: + raise ValueError( + f"FixPidV2.create: Unable to register correct_pid_v2={correct_pid_v2} and incorrect_pid_v2={incorrect_pid_v2} to be fixed" + ) + try: + obj = cls() + obj.pid_provider_xml = pid_provider_xml + obj.incorrect_pid_v2 = incorrect_pid_v2 + obj.correct_pid_v2 = correct_pid_v2 + obj.fixed_in_core = fixed_in_core + obj.fixed_in_upload = fixed_in_upload + obj.creator = user + obj.save() + return obj + except IntegrityError: + return cls.get(pid_provider_xml) + + @classmethod + def create_or_update( + cls, + user, + pid_provider_xml=None, + incorrect_pid_v2=None, + correct_pid_v2=None, + fixed_in_core=None, + fixed_in_upload=None, + ): + try: + obj = cls.get( + pid_provider_xml=pid_provider_xml, + ) + obj.updated_by = user + obj.fixed_in_core = fixed_in_core or obj.fixed_in_core + obj.fixed_in_upload = fixed_in_upload or obj.fixed_in_upload + obj.save() + return obj + except cls.DoesNotExist: + return cls.create( + user, + pid_provider_xml, + incorrect_pid_v2, + correct_pid_v2, + fixed_in_core, + fixed_in_upload, + ) + + @classmethod + def get_or_create( + cls, + user, + pid_provider_xml, + correct_pid_v2, + ): + try: + return cls.objects.get( + pid_provider_xml=pid_provider_xml, + ) + except cls.DoesNotExist: + return cls.create( + user, + pid_provider_xml, + pid_provider_xml.v2, + correct_pid_v2, + fixed_in_core=None, + fixed_in_upload=None, + ) diff --git a/pid_provider/provider.py b/pid_provider/provider.py index b8bfe6fe..71112579 100644 --- a/pid_provider/provider.py +++ b/pid_provider/provider.py @@ -1,9 +1,31 @@ +from django.db.models import Q from pid_provider.base_pid_provider import BasePidProvider +from pid_provider.models import PidProviderXML class PidProvider(BasePidProvider): """ Recebe XML para validar ou atribuir o ID do tipo v3 """ - pass + + @staticmethod + def get_xmltree(pid_v3): + try: + return PidProviderXML.get_xml_with_pre(pid_v3).xmltree + except (PidProviderXML.DoesNotExist, AttributeError): + return None + + @staticmethod + def get_sps_pkg_name(pid_v3): + try: + return PidProviderXML.get_xml_with_pre(pid_v3).sps_pkg_name + except (PidProviderXML.DoesNotExist, AttributeError): + return None + + def fix_pid_v2(self, user, pid_v3, correct_pid_v2): + try: + item = PidProviderXML.objects.get(v3=pid_v3) + except PidProviderXML.DoesNotExist as e: + raise PidProviderXML.DoesNotExist(f"{e}: {pid_v3}") + return item.fix_pid_v2(user, correct_pid_v2) diff --git a/pid_provider/requester.py b/pid_provider/requester.py index 4cd152c1..395545cf 100644 --- a/pid_provider/requester.py +++ b/pid_provider/requester.py @@ -1,17 +1,19 @@ import logging import sys +from django.db.models import Q from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre from pid_provider.base_pid_provider import BasePidProvider from pid_provider.client import PidProviderAPIClient -from pid_provider.models import PidProviderXML +from pid_provider.models import PidProviderXML, FixPidV2 from tracker.models import UnexpectedEvent class PidRequester(BasePidProvider): """ - Recebe XML para validar ou atribuir o ID do tipo v3 + Uso exclusivo da aplicação Upload + Realiza solicitações para Pid Provider do Core """ def __init__(self): @@ -163,3 +165,58 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): ) return registered + + def fix_pid_v2( + self, + user, + pid_v3, + correct_pid_v2, + ): + """ + Corrige pid_v2 + """ + fixed = { + "pid_v3": pid_v3, + "correct_pid_v2": correct_pid_v2, + } + + pid_provider_xml = PidProviderXML.objects.get(v3=pid_v3) + fixed["pid_v2"] = pid_provider_xml.v2 + try: + item_to_fix = FixPidV2.get_or_create( + user, pid_provider_xml, correct_pid_v2) + except ValueError as e: + return { + "error_message": str(e), + "error_type": str(type(e)), + "pid_v3": pid_v3, + "correct_pid_v2": correct_pid_v2, + } + + if not item_to_fix.fixed_in_upload: + # atualiza v2 em pid_provider_xml + response = pid_provider_xml.fix_pid_v2(user, correct_pid_v2) + fixed["fixed_in_upload"] = response.get("v2") == correct_pid_v2 + + if not item_to_fix.fixed_in_core: + # atualiza v2 em pid_provider_xml do CORE + # core - fix pid v2 + response = self.pid_provider_api.fix_pid_v2(pid_v3, correct_pid_v2) + logging.info(f"Resposta de Core.fix_pid_v2 {fixed}: {response}") + fixed.update(response or {}) + + fixed_in_upload = fixed.get("fixed_in_upload") + fixed_in_core = fixed.get("fixed_in_core") + if fixed_in_upload or fixed_in_core: + obj = FixPidV2.create_or_update( + user, + pid_provider_xml=pid_provider_xml, + incorrect_pid_v2=item_to_fix.incorrect_pid_v2, + correct_pid_v2=item_to_fix.correct_pid_v2, + fixed_in_core=fixed_in_core or item_to_fix.fixed_in_core, + fixed_in_upload=fixed_in_upload or item_to_fix.fixed_in_upload, + ) + fixed["fixed_in_upload"] = obj.fixed_in_upload + fixed["fixed_in_core"] = obj.fixed_in_core + logging.info(fixed) + return fixed diff --git a/pid_provider/tasks.py b/pid_provider/tasks.py index dbe30b3c..ed320246 100644 --- a/pid_provider/tasks.py +++ b/pid_provider/tasks.py @@ -4,6 +4,9 @@ from config import celery_app from pid_provider.provider import PidProvider +from pid_provider.requester import PidRequester +from proc.models import ArticleProc + User = get_user_model() @@ -33,3 +36,31 @@ def provide_pid_for_file( ): logging.info(resp) # return response + + +@celery_app.task(bind=True) +def task_fix_pid_v2( + self, + username=None, + user_id=None, +): + for article_proc in ArticleProc.objects.filter(sps_pkg__isnull=False).iterator(): + subtask_fix_pid_v2.apply_async( + kwargs=dict( + username=username, + user_id=user_id, + article_proc_id=article_proc.id, + ) + ) + + +@celery_app.task(bind=True) +def subtask_fix_pid_v2( + self, + username=None, + user_id=None, + article_proc_id=None, +): + user = _get_user(self.request, username=username, user_id=user_id) + article_proc = ArticleProc.objects.get(pk=article_proc_id) + article_proc.fix_pid_v2(user) diff --git a/pid_provider/wagtail_hooks.py b/pid_provider/wagtail_hooks.py index c23ee68e..3237b177 100644 --- a/pid_provider/wagtail_hooks.py +++ b/pid_provider/wagtail_hooks.py @@ -13,6 +13,7 @@ OtherPid, PidProviderXML, PidRequest, + FixPidV2, ) @@ -172,6 +173,38 @@ class PidProviderConfigAdmin(ModelAdmin): ) +class FixPidV2CreateView(CreateView): + def form_valid(self, form): + self.object = form.save_all(self.request.user) + return HttpResponseRedirect(self.get_success_url()) + + +class FixPidV2Admin(ModelAdmin): + list_per_page = 10 + model = FixPidV2 + inspect_view_enabled = True + menu_label = _("Fix pid v2") + create_view_class = FixPidV2CreateView + menu_icon = "folder" + add_to_settings_menu = False + exclude_from_explorer = False + + list_display = ( + "pid_provider_xml", + "correct_pid_v2", + "fixed_in_core", + "fixed_in_upload", + "created", + "updated", + ) + list_filter = ("fixed_in_core", "fixed_in_upload") + search_fields = ( + "correct_pid_v2", + "pid_provider_xml__v3", + "pid_provider_xml__pkg_name", + ) + + class PidProviderAdminGroup(ModelAdminGroup): menu_label = _("Pid Provider") menu_icon = "folder-open-inverse" # change as required @@ -182,6 +215,7 @@ class PidProviderAdminGroup(ModelAdminGroup): PidRequestAdmin, OtherPidAdmin, CollectionPidRequestAdmin, + FixPidV2Admin, ) diff --git a/proc/models.py b/proc/models.py index 9646297e..9f363414 100644 --- a/proc/models.py +++ b/proc/models.py @@ -1172,6 +1172,7 @@ def generate_sps_package( with TemporaryDirectory() as output_folder: xml_with_pre = get_migrated_xml_with_pre(self) + builder = PkgZipBuilder(xml_with_pre) sps_pkg_zip_path = builder.build_sps_package( output_folder, @@ -1186,6 +1187,9 @@ def generate_sps_package( # verificar se este código pode ser aproveitado pelo fluxo # de ingresso, se sim, ajustar os valores dos parâmetros # origin e is_published + + self.fix_pid_v2(user) + self.sps_pkg = SPSPkg.create_or_update( user, sps_pkg_zip_path, @@ -1213,6 +1217,11 @@ def generate_sps_package( detail=self.sps_pkg and self.sps_pkg.data or None, ) + def fix_pid_v2(self, user): + if self.sps_pkg: + self.sps_pkg.fix_pid_v2( + user, correct_pid_v2=self.migrated_data.pid) + def update_sps_pkg_status(self): if not self.sps_pkg: self.sps_pkg_status = tracker_choices.PROGRESS_STATUS_REPROC From 7a387787eee62ec3c052af61b901efc27cfa488b Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 24 Mar 2024 10:51:21 -0300 Subject: [PATCH 06/25] Corrige ausencia de pid v3 no xml submetido do upload para o core (#411) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Atualiza a versão de packtools 4.1.1 para usar XMLWithPre.data e .files * Modifica PidProviderXML.is_registered para atualizar os pids de xml_with_pre com os valores registrados, além disso, era necessário retornar se está registrado e igual ou registrado e diferente ou não registrado * Distingue status de demanda de registro e status do registro * Modifica PidProviderAPIClient._process_post_xml_response para atualizar ou não os valores dos pids de xml_with_pre com os valores fornecidos pelo Core * Adiciona registered_in_core como filtro de PidProviderXMLModelAdmin --- pid_provider/client.py | 28 ++++++++++++++--------- pid_provider/models.py | 25 ++++++++++++++++----- pid_provider/requester.py | 42 ++++++++++++++++++++++++++--------- pid_provider/wagtail_hooks.py | 1 + requirements/base.txt | 2 +- 5 files changed, 71 insertions(+), 27 deletions(-) diff --git a/pid_provider/client.py b/pid_provider/client.py index 977a58a7..753246b2 100644 --- a/pid_provider/client.py +++ b/pid_provider/client.py @@ -109,7 +109,7 @@ def api_password(self): raise exceptions.APIPidProviderConfigError(e) return self._api_password - def provide_pid(self, xml_with_pre, name): + def provide_pid(self, xml_with_pre, name, created=None): """ name : str nome do arquivo xml @@ -123,7 +123,7 @@ def provide_pid(self, xml_with_pre, name): ) response = self._prepare_and_post_xml(xml_with_pre, name, self.token) - self._process_post_xml_response(response, xml_with_pre) + self._process_post_xml_response(response, xml_with_pre, created) try: return response[0] except IndexError: @@ -177,6 +177,8 @@ def _prepare_and_post_xml(self, xml_with_pre, name, token): name, ext = os.path.splitext(name) zip_xml_file_path = os.path.join(tmpdirname, name + ".zip") + logging.info(f"Posting xml with {xml_with_pre.data}") + create_xml_zip_file( zip_xml_file_path, xml_with_pre.tostring(pretty_print=True) ) @@ -233,14 +235,25 @@ def _post_xml(self, zip_xml_file_path, token, timeout): ) ) - def _process_post_xml_response(self, response, xml_with_pre): + def _process_post_xml_response(self, response, xml_with_pre, created=None): if not response: return logging.info(f"_process_post_xml_response: {response}") for item in response: + if not item.get("xml_changed"): + # dados em Upload é o mais atualizado return + try: + # atualiza xml_with_pre com valor do XML registrado no Core + if not item.get("force_xml_changed"): + # exceto 'force_xml_changed=True' ou + # exceto se o registro do Core foi criado posteriormente + if created and created < item["created"]: + # não atualizar com os dados do Core + return + for pid_type, pid_value in item["xml_changed"].items(): try: if pid_type == "pid_v3": @@ -249,15 +262,10 @@ def _process_post_xml_response(self, response, xml_with_pre): xml_with_pre.v2 = pid_value elif pid_type == "aop_pid": xml_with_pre.aop_pid = pid_value + item["do_upload_registration"] = True except Exception as e: pass - - except KeyError: - pass - try: - # atualiza xml_with_pre com valor do XML registrado no core - for xml_with_pre in XMLWithPre.create(uri=item["xml_uri"]): - break + return except KeyError: pass diff --git a/pid_provider/models.py b/pid_provider/models.py index f66d41d9..207fc2f0 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -822,8 +822,8 @@ def skip_registration( logging.info(f"Do not skip update: not registered") return - if registered_in_core and not registered.registered_in_core: - logging.info(f"Do not skip update: registered_in_core") + if registered_in_core != registered.registered_in_core: + logging.info(f"Do not skip update: need to update registered_in_core") return # verifica se é necessário atualizar @@ -1267,15 +1267,30 @@ def is_registered(cls, xml_with_pre): try: registered = cls._query_document(xml_adapter) - if registered and registered.is_equal_to(xml_adapter): - return registered.data + if registered: + # recupera os valores de pid v3, v2, aop_pid do registro PidProviderXML + # e adiciona / atualiza o xml_with_pre, o que garante que + # o XML tenha os pids ao ingressar no Core + # manterá estes valores, evitando que gere novos valores + # por não estarem presentes no XML + if registered.v3: + xml_with_pre.v3 = registered.v3 + if registered.v2: + xml_with_pre.v2 = registered.v2 + if registered.aop_pid: + xml_with_pre.aop_pid = registered.aop_pid + + data = registered.data + data["is_registered"] = True + data["is_equal"] = registered.is_equal_to(xml_adapter) + return data except ( exceptions.NotEnoughParametersToGetDocumentRecordError, exceptions.QueryDocumentMultipleObjectsReturnedError, ) as e: logging.exception(e) return {"error_msg": str(e), "error_type": str(type(e))} - return {} + return {"is_registered": False} def fix_pid_v2(self, user, correct_pid_v2): try: diff --git a/pid_provider/requester.py b/pid_provider/requester.py index 395545cf..eaf2926c 100644 --- a/pid_provider/requester.py +++ b/pid_provider/requester.py @@ -88,10 +88,12 @@ def request_pid_for_xml_with_pre( self.core_registration(xml_with_pre, registered, article_proc, user) xml_changed = registered.get("xml_changed") - if not registered["registered_in_upload"]: - # não está registrado em Upload, realizar registro - + if registered.get("do_upload_registration"): + # Cria ou atualiza registro de PidProviderXML de Upload, se: + # - está registrado no upload mas o conteúdo mudou, atualiza + # - ou não está registrado no Upload, então cria op = article_proc.start(user, ">>> upload registration") + resp = self.provide_pid_for_xml_with_pre( xml_with_pre, xml_with_pre.filename, @@ -118,7 +120,9 @@ def request_pid_for_xml_with_pre( registered["xml_with_pre"] = xml_with_pre registered["filename"] = name - main_op.finish(user, completed=True, detail={"registered": registered}) + detail = registered.copy() + detail["xml_with_pre"] = xml_with_pre.data + main_op.finish(user, completed=True, detail={"registered": detail}) return registered @staticmethod @@ -128,14 +132,25 @@ def get_registration_demand(xml_with_pre, article_proc, user): Returns ------- - {"registered_in_upload": boolean, "registered_in_core": boolean} + {"do_core_registration": boolean, "do_upload_registration": boolean} """ op = article_proc.start(user, ">>> get registration demand") registered = PidProviderXML.is_registered(xml_with_pre) or {} - registered["registered_in_upload"] = bool(registered.get("v3")) - registered["registered_in_core"] = registered.get("registered_in_core") + + if registered.get("is_equal"): + # xml recebido é igual ao registrado + registered["do_upload_registration"] = False + registered["do_core_registration"] = not registered.get("registered_in_core") + registered["registered_in_upload"] = True + else: + # registrado no upload e xml recebido é diferente ao registrado + # xml recebido não está registrado no upload + registered["do_upload_registration"] = True + registered["do_core_registration"] = True + registered["registered_in_core"] = False + registered["registered_in_upload"] = False op.finish(user, completed=True, detail={"registered": registered}) @@ -145,7 +160,13 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): """ Solicita PID v3 para o Core, se necessário """ - if not registered["registered_in_core"]: + if registered["do_core_registration"]: + + if registered.get("is_registered") and not xml_with_pre.v3: + raise ValueError( + f"Unable to execute core registration for xml_with_pre without v3" + ) + op = article_proc.start(user, ">>> core registration") if not self.pid_provider_api.enabled: @@ -153,8 +174,9 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): return registered response = self.pid_provider_api.provide_pid( - xml_with_pre, xml_with_pre.filename + xml_with_pre, xml_with_pre.filename, created=registered.get("created") ) + response = response or {} registered.update(response) registered["registered_in_core"] = bool(response.get("v3")) @@ -164,8 +186,6 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): detail={"registered": registered, "response": response}, ) - return registered - def fix_pid_v2( self, user, diff --git a/pid_provider/wagtail_hooks.py b/pid_provider/wagtail_hooks.py index 3237b177..810d5b2d 100644 --- a/pid_provider/wagtail_hooks.py +++ b/pid_provider/wagtail_hooks.py @@ -107,6 +107,7 @@ class PidProviderXMLAdmin(ModelAdmin): "article_pub_year", "pub_year", "other_pid_count", + "registered_in_core", ) search_fields = ( "pkg_name", diff --git a/requirements/base.txt b/requirements/base.txt index a6236963..2de7634d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -60,7 +60,7 @@ minio==7.2 # Upload # ------------------------------------------------------------------------------ lxml==4.9.3 # https://github.com/lxml/lxml --e git+https://github.com/scieloorg/packtools.git@3.4.0#egg=packtools +-e git+https://github.com/scieloorg/packtools.git@4.1.1#egg=packtools -e git+https://github.com/scieloorg/scielo_scholarly_data#egg=scielo_scholarly_data # DSM Publication From a259b996e282c3f36362803a3f88dd7cfcb90b87 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel <82840278+samuelveigarangel@users.noreply.github.com> Date: Sun, 24 Mar 2024 10:52:39 -0300 Subject: [PATCH 07/25] Atualiza dependencias base.txt e production.txt (#409) * Comenta app captcha * Atualiza dependencias --------- Co-authored-by: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> --- config/settings/base.py | 2 +- requirements/base.txt | 44 +++++++++++++++++-------------------- requirements/production.txt | 8 +++---- 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/config/settings/base.py b/config/settings/base.py index a906f7e2..54b2dc0d 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -101,7 +101,7 @@ "allauth.account", "allauth.socialaccount", "django_celery_beat", - "captcha", + # "captcha", "wagtailcaptcha", "wagtailmenus", "rest_framework", diff --git a/requirements/base.txt b/requirements/base.txt index 2de7634d..fffa56c5 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,44 +1,43 @@ -pytz==2023.3 # https://github.com/stub42/pytz -python-slugify==8.0.1 # https://github.com/un33k/python-slugify -Pillow==10.1.0 # https://github.com/python-pillow/Pillow -rcssmin==1.1.1 #django-compressor < 1.1.2 # https://github.com/ndparker/rcssmin +pytz==2023.3.post1 # https://github.com/stub42/pytz +Pillow==10.2.0 # https://github.com/python-pillow/Pillow argon2-cffi==23.1.0 # https://github.com/hynek/argon2_cffi whitenoise==6.6.0 # https://github.com/evansd/whitenoise redis==5.0.1 # https://github.com/redis/redis-py hiredis==2.2.3 # https://github.com/redis/hiredis-py # celery==5.2.7 # pyup: < 6.0 # https://github.com/celery/celery celery==5.3.6 # pyup: < 6.0 # https://github.com/celery/celery -flower==2.0.1 # https://github.com/mher/flower +flower==2.0.1 # https://github.com/mher/flower # Django # ------------------------------------------------------------------------------ -django==4.2.6 +django==5.0.3 django-environ==0.11.2 # https://github.com/joke2k/django-environ -django-model-utils==4.3.1 # https://github.com/jazzband/django-model-utils -django-allauth==0.58.1 # https://github.com/pennersr/django-allauth +django-model-utils==4.4.0 # https://github.com/jazzband/django-model-utils +django-allauth==0.61.1 # https://github.com/pennersr/django-allauth django-crispy-forms==2.1 # https://github.com/django-crispy-forms/django-crispy-forms -crispy-bootstrap5==0.7 # https://github.com/django-crispy-forms/crispy-bootstrap5 +crispy-bootstrap5==2024.2 # https://github.com/django-crispy-forms/crispy-bootstrap5 django-compressor==4.4 # https://github.com/django-compressor/django-compressor -django-redis==5.4.0 # https://github.com/jazzband/django-redis +django-redis==5.4.0 # https://github.com/jazzband/django-redis4 # Django REST -djangorestframework==3.14.0 -djangorestframework-simplejwt==5.3.0 # https://django-rest-framework-simplejwt.readthedocs.io/en/latest/ +djangorestframework==3.15.0 +djangorestframework-simplejwt==5.3.1 # https://django-rest-framework-simplejwt.readthedocs.io/en/latest/ + # Django celery # ------------------------------------------------------------------------------ -django-celery-beat==2.5.0 # https://github.com/celery/django-celery-beat +django-celery-beat==2.6.0 # https://github.com/celery/django-celery-beat django_celery_results==2.5.1 # Wagtail # ------------------------------------------------------------------------------ -wagtail==5.2.1 # https://github.com/wagtail/wagtail +wagtail==5.2.3 # https://github.com/wagtail/wagtail # Wagtail Recaptcha # ------------------------------------------------------------------------------ -django-recaptcha==3.0.0 -wagtail-django-recaptcha==1.0 +# django-recaptcha==3.0.0 +wagtail-django-recaptcha==2.1.1 # Wagtail Menu # ------------------------------------------------------------------------------ @@ -46,7 +45,7 @@ wagtailmenus==3.1.9 # Wagtail Localize # ------------------------------------------------------------------------------ -wagtail-localize==1.7 +wagtail-localize==1.8.2 # Wagtail-Autocomplete # https://github.com/wagtail/wagtail-autocomplete @@ -55,22 +54,19 @@ wagtail-autocomplete==0.11.0 # DSM Minio # ------------------------------------------------------------------------------ -minio==7.2 +minio==7.2.5 # Upload # ------------------------------------------------------------------------------ -lxml==4.9.3 # https://github.com/lxml/lxml +lxml==4.9.4 # https://github.com/lxml/lxml -e git+https://github.com/scieloorg/packtools.git@4.1.1#egg=packtools -e git+https://github.com/scieloorg/scielo_scholarly_data#egg=scielo_scholarly_data # DSM Publication # ------------------------------------------------------------------------------ -e git+https://github.com/scieloorg/opac_schema.git@v2.66#egg=opac_schema -mongoengine==0.27.0 -pymongo==4.6.1 +mongoengine==0.28.2 aiohttp==3.9.1 -python-magic==0.4.27 - # DSM Migration # ------------------------------------------------------------------------------ -e git+https://github.com/scieloorg/scielo_migration.git@1.6.4#egg=scielo_classic_website @@ -81,4 +77,4 @@ tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability # Tenacity # ------------------------------------------------------------------------------ tenacity==8.2.3 # https://pypi.org/project/tenacity/ -urllib3==2.1.0 +urllib3==2.2.1 diff --git a/requirements/production.txt b/requirements/production.txt index e59f56d2..0443568a 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -2,10 +2,10 @@ -r base.txt -gevent==23.9.1 # http://www.gevent.org/ -gunicorn==21.2.0 # https://github.com/benoitc/gunicorn +gevent==24.2.1 # http://www.gevent.org/ +gunicorn==21.2.0 # https://github.com/benoitc/gunicorn psycopg2-binary==2.9.9 # https://github.com/psycopg/psycopg2 -sentry-sdk==1.39.1 # https://github.com/getsentry/sentry-python +sentry-sdk==1.43.0 # https://github.com/getsentry/sentry-python # Django # ------------------------------------------------------------------------------ @@ -14,4 +14,4 @@ setuptools>=68.2.2 # not directly required, pinned by Snyk to avoid a vulnerabil # Elastic-APM # https://pypi.org/project/elastic-apm/ # ------------------------------------------------------------------------------ -elastic-apm==6.19.0 \ No newline at end of file +elastic-apm==6.21.4.post8347027212 \ No newline at end of file From ff068e8ab5fff64ea0db757531fd9a3378e632e9 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Wed, 27 Mar 2024 00:25:02 -0300 Subject: [PATCH 08/25] =?UTF-8?q?Modifica=20comportamento=20de=20Pid=20pro?= =?UTF-8?q?vider,=20que=20passa=20a=20aceitar=20mudan=C3=A7as=20de=20pids?= =?UTF-8?q?=20(#415)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria PidProviderXML.complete_pids, que completa pids com registrados ou inéditos * Cria PidProviderXML._check_pids, que valida pid do XML é inédito e/ou registrado e/ou pertencente a outro documento * Cria PidProviderXML.get_pids, que retorna todos os pids vigentes e outros * Corrige PidProviderXML._is_registered_pid, adicionando a verificação em OtherPid * Corrige PidProviderXML._get_unique_v3, que usa _is_registered_pid e agora não precisa verificar OtherPid * Ajusta PidProviderXML._add_other_pid * Remove PidProviderXML._complete_pids excedente * Corrige PidProvider._add_pid_v3 e _add_pid_v2 * Corrige PidProviderXML.is_registered * Ajusta PidProviderXML._save, removendo _add_other_pid e removendo change_pids * Modifica PidProviderXML.register * Melhora XMLVersion.__str__, mostrando nome do arquivo + data no lugar de pid v3 * Melhora _process_post_xml_response * Para PidProvider.provide_pid_for_xml_with_pre, adiciona parâmetro caller, completa XML com pids registrados se ausentes no XML, adiciona xml_changed ao retorno * Adiciona comando para completar XML com pids registrados antes de solicitar pid para Core --- pid_provider/base_pid_provider.py | 11 + pid_provider/client.py | 15 +- pid_provider/models.py | 335 ++++++++++++++++++++---------- pid_provider/requester.py | 55 +++-- 4 files changed, 288 insertions(+), 128 deletions(-) diff --git a/pid_provider/base_pid_provider.py b/pid_provider/base_pid_provider.py index ade1e13e..3808f3db 100644 --- a/pid_provider/base_pid_provider.py +++ b/pid_provider/base_pid_provider.py @@ -22,10 +22,14 @@ def provide_pid_for_xml_with_pre( is_published=None, origin=None, registered_in_core=None, + caller=None, ): """ Fornece / Valida PID para o XML no formato de objeto de XMLWithPre """ + # Completa os valores ausentes de pid com recuperados ou com inéditos + xml_changed = PidProviderXML.complete_pids(xml_with_pre) + registered = PidProviderXML.register( xml_with_pre, name, @@ -36,6 +40,11 @@ def provide_pid_for_xml_with_pre( origin=origin, registered_in_core=registered_in_core, ) + if xml_changed: + registered["xml_changed"] = xml_changed + # indica que Upload precisa aplicar as mudanças no xml_with_pre + registered["apply_xml_changes"] = caller == "core" + return registered def provide_pid_for_xml_zip( @@ -47,6 +56,7 @@ def provide_pid_for_xml_zip( force_update=None, is_published=None, registered_in_core=None, + caller=None, ): """ Fornece / Valida PID para o XML em um arquivo compactado @@ -66,6 +76,7 @@ def provide_pid_for_xml_zip( is_published=is_published, origin=zip_xml_file_path, registered_in_core=registered_in_core, + caller=caller, ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() diff --git a/pid_provider/client.py b/pid_provider/client.py index 753246b2..8a58757a 100644 --- a/pid_provider/client.py +++ b/pid_provider/client.py @@ -238,20 +238,25 @@ def _post_xml(self, zip_xml_file_path, token, timeout): def _process_post_xml_response(self, response, xml_with_pre, created=None): if not response: return - logging.info(f"_process_post_xml_response: {response}") for item in response: + logging.info(f"_process_post_xml_response ({xml_with_pre.data}): {item}") if not item.get("xml_changed"): - # dados em Upload é o mais atualizado + # pids do xml_with_pre não mudaram + logging.info("No xml changes") return try: # atualiza xml_with_pre com valor do XML registrado no Core - if not item.get("force_xml_changed"): - # exceto 'force_xml_changed=True' ou + if not item.get("apply_xml_changes"): + # exceto 'apply_xml_changes=True' ou # exceto se o registro do Core foi criado posteriormente if created and created < item["created"]: # não atualizar com os dados do Core + logging.info({ + "created_at_upload": created, + "created_at_core": item['created'], + }) return for pid_type, pid_value in item["xml_changed"].items(): @@ -262,7 +267,7 @@ def _process_post_xml_response(self, response, xml_with_pre, created=None): xml_with_pre.v2 = pid_value elif pid_type == "aop_pid": xml_with_pre.aop_pid = pid_value - item["do_upload_registration"] = True + logging.info("XML changed") except Exception as e: pass return diff --git a/pid_provider/models.py b/pid_provider/models.py index 207fc2f0..6afd6b2e 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -69,7 +69,7 @@ class Meta: ] def __str__(self): - return self.pid_provider_xml.v3 + return f"{self.pid_provider_xml.pkg_name} {self.created}" @classmethod def create( @@ -425,6 +425,9 @@ def get_or_create(cls, pid_type, pid_in_xml, version, user, pid_provider_xml): obj.save() return obj + raise ValueError( + f"OtherPid.get_or_create requires pid_in_xml ({pid_in_xml}) and pid_type ({pid_type}) and version ({version}) and user ({user}) and pid_provider_xml ({pid_provider_xml})" + ) @property def created_updated(self): @@ -602,6 +605,88 @@ def xml_with_pre(self): def is_aop(self): return self.volume is None and self.number is None and self.suppl is None + @classmethod + def _check_pids(cls, user, xml_adapter, registered): + """ + No XML tem que conter os pids pertencentes ao registrado ou + caso não é registrado, tem que ter pids inéditos. + Também pode acontecer de o XML registrado ter mais de um pid v3, v2, ... + Pode haver necessidade de atualizar o valor de pid v3, v2, ... + Mudança em pid não é recomendado, mas pode acontecer + + Parameters + ---------- + xml_adapter: PidProviderXMLAdapter + registered: PidProviderXML + + Returns + ------- + list of dict: keys=(pid_type, pid_in_xml, registered) + + """ + changed_pids = [] + pids = {"pid_v3": [], "pid_v2": [], "aop_pid": []} + if registered: + pids = registered.get_pids() + + if xml_adapter.v3 not in pids["pid_v3"]: + # pid no xml é novo + owner = cls._is_registered_pid(v3=xml_adapter.v3) + if owner: + # e está registrado para outro XML + raise ValueError( + f"PID {xml_adapter.v3} is already registered for {owner}" + ) + elif registered: + # indica a mudança do pid + item = { + "pid_type": "pid_v3", + "pid_in_xml": xml_adapter.v3, + "registered": registered.v3, + } + registered.v3 = xml_adapter.v3 + registered._add_other_pid([item.copy()], user) + changed_pids.append(item) + + if xml_adapter.v2 not in pids["pid_v2"]: + # pid no xml é novo + owner = cls._is_registered_pid(v2=xml_adapter.v2) + if owner: + # e está registrado para outro XML + raise ValueError( + f"PID {xml_adapter.v2} is already registered for {owner}" + ) + elif registered: + # indica a mudança do pid + item = { + "pid_type": "pid_v2", + "pid_in_xml": xml_adapter.v2, + "registered": registered.v2, + } + registered.v2 = xml_adapter.v2 + registered._add_other_pid([item.copy()], user) + changed_pids.append(item) + + if xml_adapter.aop_pid and xml_adapter.aop_pid not in pids["aop_pid"]: + # pid no xml é novo + owner = cls._is_registered_pid(aop_pid=xml_adapter.aop_pid) + if owner: + # e está registrado para outro XML + raise ValueError( + f"PID {xml_adapter.aop_pid} is already registered for {owner}" + ) + elif registered: + # indica a mudança do pid + item = { + "pid_type": "aop_pid", + "pid_in_xml": xml_adapter.aop_pid, + "registered": registered.aop_pid, + } + registered.aop_pid = xml_adapter.aop_pid + registered._add_other_pid([item.copy()], user) + changed_pids.append(item) + return changed_pids + @classmethod def register( cls, @@ -647,12 +732,23 @@ def register( """ try: + detail = xml_with_pre.data + logging.info(f"PidProviderXML.register: {detail}") + input_data = {} input_data["xml_with_pre"] = xml_with_pre input_data["filename"] = filename input_data["origin"] = origin - logging.info(f"PidProviderXML.register .... {origin or filename}") + if not xml_with_pre.v3: + raise exceptions.InvalidPidError( + f"Unable to register {filename}, because v3 is invalid" + ) + + if not xml_with_pre.v2: + raise exceptions.InvalidPidError( + f"Unable to register {filename}, because v2 is invalid" + ) # adaptador do xml with pre xml_adapter = xml_sps_adapter.PidProviderXMLAdapter(xml_with_pre) @@ -671,45 +767,16 @@ def register( if updated_data: return updated_data - # verfica os PIDs encontrados no XML / atualiza-os se necessário - changed_pids = cls._complete_pids(xml_adapter, registered) - - if not xml_adapter.v3: - raise exceptions.InvalidPidError( - f"Unable to register {filename}, because v3 is invalid" - ) + # valida os PIDs do XML + # - não podem ter conflito com outros registros + # - identifica mudança + changed_pids = cls._check_pids(user, xml_adapter, registered) - if not xml_adapter.v2: - raise exceptions.InvalidPidError( - f"Unable to register {filename}, because v2 is invalid" - ) - - xml_changed = { - change["pid_type"]: change["pid_assigned"] for change in changed_pids - } - - # compara de novo, após completar pids - updated_data = cls.skip_registration( - xml_adapter, - registered, - force_update, - origin_date, - registered_in_core, - ) - if updated_data: - # XML da entrada e registrado divergem: não tem e tem pids, - # no entanto, após completar com pids, ficam idênticos - updated_data["xml_changed"] = xml_changed - updated_data.update(input_data) - if xml_with_pre.v3 == registered.v3: - logging.info("skip_registration second") - return updated_data # cria ou atualiza registro registered = cls._save( registered, xml_adapter, user, - changed_pids, origin_date, available_since, registered_in_core, @@ -717,7 +784,7 @@ def register( # data to return data = registered.data.copy() - data["xml_changed"] = xml_changed + data["changed_pids"] = changed_pids pid_request = PidRequest.cancel_failure( user=user, @@ -755,7 +822,7 @@ def register( user=user, origin_date=origin_date, origin=origin, - detail={}, + detail=detail, ) response = input_data response.update(pid_request.data) @@ -767,7 +834,6 @@ def _save( registered, xml_adapter, user, - changed_pids, origin_date=None, available_since=None, registered_in_core=None, @@ -795,12 +861,9 @@ def _save( registered._add_journal(xml_adapter) registered._add_issue(xml_adapter) - registered.save() - registered._add_current_version(xml_adapter, user) - registered.save() - registered._add_other_pid(changed_pids, user) + registered._add_current_version(xml_adapter, user) return registered @@ -998,27 +1061,25 @@ def _add_current_version(self, xml_adapter, user): self.current_version = XMLVersion.get_or_create( user, self, xml_adapter.xml_with_pre ) + self.save() def _add_other_pid(self, changed_pids, user): - # requires registered.current_version is set + # registrados passam a ser other pid + # os pids do XML passam a ser os vigentes if not changed_pids: return - if not self.current_version: - raise ValueError( - "PidProviderXML._add_other_pid requires current_version is set" - ) + self.save() for change_args in changed_pids: - if change_args["pid_in_xml"]: - # somente registra as mudanças de um pid_in_xml não vazio - change_args["user"] = user - change_args["version"] = self.current_version - change_args["pid_provider_xml"] = self - change_args.pop("pid_assigned") - OtherPid.get_or_create(**change_args) - self.other_pid_count = OtherPid.objects.filter( - pid_provider_xml=self - ).count() - self.save() + + change_args["pid_in_xml"] = change_args.pop("registered") + + change_args["user"] = user + change_args["version"] = self.current_version + change_args["pid_provider_xml"] = self + + OtherPid.get_or_create(**change_args) + self.other_pid_count = OtherPid.objects.filter(pid_provider_xml=self).count() + self.save() @classmethod def _get_unique_v3(cls): @@ -1032,10 +1093,7 @@ def _get_unique_v3(cls): while True: generated = v3_gen.generates() if not cls._is_registered_pid(v3=generated): - try: - OtherPid.objects.get(pid_type="pid_v3", pid_in_xml=generated) - except OtherPid.DoesNotExist: - return generated + return generated @classmethod def _is_registered_pid(cls, v2=None, v3=None, aop_pid=None): @@ -1050,9 +1108,15 @@ def _is_registered_pid(cls, v2=None, v3=None, aop_pid=None): try: found = cls.objects.filter(**kwargs)[0] except IndexError: - return False + try: + obj = OtherPid.objects.get(pid_in_xml=v3 or v2 or aop_pid) + return obj.pid_provider_xml + except OtherPid.DoesNotExist: + return None + except OtherPid.MultipleObjectsReturned: + return obj.pid_provider_xml else: - return True + return found @classmethod def _v2_generates(cls, xml_adapter): @@ -1077,6 +1141,65 @@ def _get_unique_v2(cls, xml_adapter): if not cls._is_registered_pid(v2=generated): return generated + @classmethod + def complete_pids( + cls, + xml_with_pre, + ): + """ + Evaluate the XML data and complete xml_with_pre with PID v3, v2, aop_pid + + Parameters + ---------- + xml : XMLWithPre + filename : str + user : User + + Returns + ------- + { + "v3": self.v3, + "v2": self.v2, + "aop_pid": self.aop_pid, + "xml_uri": self.xml_uri, + "article": self.article, + "created": self.created.isoformat(), + "updated": self.updated.isoformat(), + "xml_changed": boolean, + "record_status": created | updated | retrieved + } + """ + try: + # adaptador do xml with pre + xml_adapter = xml_sps_adapter.PidProviderXMLAdapter(xml_with_pre) + + # consulta se documento já está registrado + registered = cls._query_document(xml_adapter) + + # verfica os PIDs encontrados no XML / atualiza-os se necessário + changed_pids = cls._complete_pids(xml_adapter, registered) + + logging.info( + f"PidProviderXML.complete_pids: input={xml_with_pre.data} | output={changed_pids}" + ) + return changed_pids + + except Exception as e: + # except ( + # exceptions.NotEnoughParametersToGetDocumentRecordError, + # exceptions.QueryDocumentMultipleObjectsReturnedError, + # ) as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "PidProviderXML.complete_pids", + "detail": xml_with_pre.data, + }, + ) + return {"error_message": str(e), "error_type": str(type(e))} + @classmethod def _complete_pids(cls, xml_adapter, registered): """ @@ -1094,11 +1217,11 @@ def _complete_pids(cls, xml_adapter, registered): Parameters ---------- xml_adapter: PidProviderXMLAdapter - registered: XMLArticle + registered: PidProviderXML Returns ------- - bool + list of dict: keys=(pid_type, pid_in_xml, pid_assigned) """ before = (xml_adapter.v3, xml_adapter.v2, xml_adapter.aop_pid) @@ -1119,25 +1242,26 @@ def _complete_pids(cls, xml_adapter, registered): after = (xml_adapter.v3, xml_adapter.v2, xml_adapter.aop_pid) # verifica se houve mudança nos PIDs do XML - changes = [] + changes = {} for label, bef, aft in zip(("pid_v3", "pid_v2", "aop_pid"), before, after): if bef != aft: - changes.append( - dict( - pid_type=label, - pid_in_xml=bef, - pid_assigned=aft, - ) - ) - if changes: - LOGGER.info(f"changes: {changes}") - + changes[label] = aft return changes @classmethod def _is_valid_pid(cls, value): return bool(value and len(value) == 23) + def get_pids(self): + d = {} + d["pid_v3"] = [self.v3] + d["pid_v2"] = [self.v2] + d["aop_pid"] = [self.aop_pid] + + for item in OtherPid.objects.filter(pid_provider_xml=self).iterator(): + d[item.pid_type].append(item.pid_in_xml) + return d + @classmethod def _add_pid_v3(cls, xml_adapter, registered): """ @@ -1147,16 +1271,16 @@ def _add_pid_v3(cls, xml_adapter, registered): Arguments --------- xml_adapter: PidProviderXMLAdapter - registered: XMLArticle + registered: PidProviderXML """ - if registered: - # recupera do registrado - xml_adapter.v3 = registered.v3 - else: - # se v3 de xml está ausente ou já está registrado para outro xml - if not cls._is_valid_pid(xml_adapter.v3) or cls._is_registered_pid( - v3=xml_adapter.v3 - ): + if ( + not xml_adapter.v3 + or not cls._is_valid_pid(xml_adapter.v3) + or cls._is_registered_pid(v3=xml_adapter.v3) + ): + if registered: + xml_adapter.v3 = registered.v3 + else: # obtém um v3 inédito xml_adapter.v3 = cls._get_unique_v3() @@ -1168,7 +1292,7 @@ def _add_aop_pid(cls, xml_adapter, registered): Arguments --------- xml_adapter: PidProviderXMLAdapter - registered: XMLArticle + registered: PidProviderXML """ if registered and registered.aop_pid: xml_adapter.aop_pid = registered.aop_pid @@ -1181,13 +1305,18 @@ def _add_pid_v2(cls, xml_adapter, registered): Arguments --------- xml_adapter: PidProviderXMLAdapter - registered: XMLArticle + registered: PidProviderXML """ - if registered and registered.v2 and xml_adapter.v2 != registered.v2: - xml_adapter.v2 = registered.v2 - if not cls._is_valid_pid(xml_adapter.v2): - xml_adapter.v2 = cls._get_unique_v2(xml_adapter) + if ( + not xml_adapter.v2 + or not cls._is_valid_pid(xml_adapter.v2) + or cls._is_registered_pid(v2=xml_adapter.v2) + ): + if registered: + xml_adapter.v2 = registered.v2 + else: + xml_adapter.v2 = cls._get_unique_v2(xml_adapter) @classmethod def validate_query_params(cls, query_params): @@ -1268,20 +1397,7 @@ def is_registered(cls, xml_with_pre): try: registered = cls._query_document(xml_adapter) if registered: - # recupera os valores de pid v3, v2, aop_pid do registro PidProviderXML - # e adiciona / atualiza o xml_with_pre, o que garante que - # o XML tenha os pids ao ingressar no Core - # manterá estes valores, evitando que gere novos valores - # por não estarem presentes no XML - if registered.v3: - xml_with_pre.v3 = registered.v3 - if registered.v2: - xml_with_pre.v2 = registered.v2 - if registered.aop_pid: - xml_with_pre.aop_pid = registered.aop_pid - data = registered.data - data["is_registered"] = True data["is_equal"] = registered.is_equal_to(xml_adapter) return data except ( @@ -1290,7 +1406,7 @@ def is_registered(cls, xml_with_pre): ) as e: logging.exception(e) return {"error_msg": str(e), "error_type": str(type(e))} - return {"is_registered": False} + return {} def fix_pid_v2(self, user, correct_pid_v2): try: @@ -1318,6 +1434,7 @@ class CollectionPidRequest(CommonControlField): para controlar a entrada de XML provenientes do AM registrando cada coleção e a data da coleta """ + collection = models.ForeignKey( Collection, on_delete=models.SET_NULL, null=True, blank=True ) @@ -1452,7 +1569,11 @@ def create( fixed_in_core=None, fixed_in_upload=None, ): - if correct_pid_v2 == incorrect_pid_v2 or not correct_pid_v2 or not incorrect_pid_v2: + if ( + correct_pid_v2 == incorrect_pid_v2 + or not correct_pid_v2 + or not incorrect_pid_v2 + ): raise ValueError( f"FixPidV2.create: Unable to register correct_pid_v2={correct_pid_v2} and incorrect_pid_v2={incorrect_pid_v2} to be fixed" ) diff --git a/pid_provider/requester.py b/pid_provider/requester.py index eaf2926c..897fc623 100644 --- a/pid_provider/requester.py +++ b/pid_provider/requester.py @@ -85,10 +85,29 @@ def request_pid_for_xml_with_pre( if registered.get("error_type"): return registered + xml_changed = {} + # Completa os valores ausentes de pid com recuperados ou com inéditos + try: + before = (xml_with_pre.v3, xml_with_pre.v2, xml_with_pre.aop_pid) + xml_with_pre.v3 = xml_with_pre.v3 or registered["v3"] + xml_with_pre.v2 = xml_with_pre.v2 or registered["v2"] + if registered["aop_pid"]: + xml_with_pre.aop_pid = registered["aop_pid"] + + # verifica se houve mudança nos PIDs do XML + after = (xml_with_pre.v3, xml_with_pre.v2, xml_with_pre.aop_pid) + for label, bef, aft in zip(("pid_v3", "pid_v2", "aop_pid"), before, after): + if bef != aft: + xml_changed[label] = aft + except KeyError: + pass + + # Solicita pid para Core self.core_registration(xml_with_pre, registered, article_proc, user) - xml_changed = registered.get("xml_changed") + xml_changed = xml_changed or registered.get("xml_changed") - if registered.get("do_upload_registration"): + # Atualiza registro de Upload + if registered["do_upload_registration"] or xml_changed: # Cria ou atualiza registro de PidProviderXML de Upload, se: # - está registrado no upload mas o conteúdo mudou, atualiza # - ou não está registrado no Upload, então cria @@ -114,7 +133,7 @@ def request_pid_for_xml_with_pre( ) registered["synchronized"] = ( - registered["registered_in_core"] and registered["registered_in_upload"] + registered.get("registered_in_core") and registered.get("registered_in_upload") ) registered["xml_changed"] = xml_changed registered["xml_with_pre"] = xml_with_pre @@ -141,16 +160,12 @@ def get_registration_demand(xml_with_pre, article_proc, user): if registered.get("is_equal"): # xml recebido é igual ao registrado - registered["do_upload_registration"] = False registered["do_core_registration"] = not registered.get("registered_in_core") - registered["registered_in_upload"] = True + registered["do_upload_registration"] = registered["do_core_registration"] else: - # registrado no upload e xml recebido é diferente ao registrado - # xml recebido não está registrado no upload - registered["do_upload_registration"] = True + # xml recebido é diferente ao registrado ou não está no upload registered["do_core_registration"] = True - registered["registered_in_core"] = False - registered["registered_in_upload"] = False + registered["do_upload_registration"] = True op.finish(user, completed=True, detail={"registered": registered}) @@ -162,10 +177,7 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): """ if registered["do_core_registration"]: - if registered.get("is_registered") and not xml_with_pre.v3: - raise ValueError( - f"Unable to execute core registration for xml_with_pre without v3" - ) + registered["registered_in_core"] = False op = article_proc.start(user, ">>> core registration") @@ -173,6 +185,11 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): op.finish(user, completed=False, detail={"core_pid_provider": "off"}) return registered + if registered.get("v3") and not xml_with_pre.v3: + raise ValueError( + f"Unable to execute core registration for xml_with_pre without v3" + ) + response = self.pid_provider_api.provide_pid( xml_with_pre, xml_with_pre.filename, created=registered.get("created") ) @@ -200,8 +217,14 @@ def fix_pid_v2( "correct_pid_v2": correct_pid_v2, } - pid_provider_xml = PidProviderXML.objects.get(v3=pid_v3) - fixed["pid_v2"] = pid_provider_xml.v2 + try: + pid_provider_xml = PidProviderXML.objects.get( + v3=pid_v3, v2__contains=correct_pid_v2[:14]) + fixed["pid_v2"] = pid_provider_xml.v2 + except PidProviderXML.DoesNotExist: + return fixed + except PidProviderXML.MultipleObjectsReturned: + return fixed try: item_to_fix = FixPidV2.get_or_create( user, pid_provider_xml, correct_pid_v2) From 17774551bc261e28fd572a7fa94e89c9d548486d Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Wed, 27 Mar 2024 01:24:42 -0300 Subject: [PATCH 09/25] Cria meio de configurar / habilitar / desabilitar fix_pid_v2 do Core (#416) * Cria a classe PidProviderEndpoint, inline de PidProviderConfig * Modifica o modo de obter fix_pid_v2_url * Adiciona modelo PidProviderEndpoint --- pid_provider/choices.py | 3 + pid_provider/client.py | 14 +-- .../migrations/0003_pidproviderendpoint.py | 102 ++++++++++++++++++ pid_provider/models.py | 38 ++++++- 4 files changed, 150 insertions(+), 7 deletions(-) create mode 100644 pid_provider/choices.py create mode 100644 pid_provider/migrations/0003_pidproviderendpoint.py diff --git a/pid_provider/choices.py b/pid_provider/choices.py new file mode 100644 index 00000000..6ddfda0d --- /dev/null +++ b/pid_provider/choices.py @@ -0,0 +1,3 @@ +ENDPOINTS = ( + ('fix-pid-v2', 'fix-pid-v2'), +) diff --git a/pid_provider/client.py b/pid_provider/client.py index 8a58757a..10b92037 100644 --- a/pid_provider/client.py +++ b/pid_provider/client.py @@ -63,12 +63,12 @@ def config(self): def fix_pid_v2_url(self): if not hasattr(self, "_fix_pid_v2_url") or not self._fix_pid_v2_url: try: - if self.pid_provider_api_post_xml: - self._fix_pid_v2_url = self.pid_provider_api_post_xml.replace( - "pid_provider", "fix_pid_v2" - ) - except AttributeError as e: - raise exceptions.APIPidProviderConfigError(e) + self._fix_pid_v2_url = None + endpoint = self.config.endpoint.filter(name='fix-pid-v2')[0] + if endpoint.enabled: + self._fix_pid_v2_url = endpoint.url + except IndexError: + pass return self._fix_pid_v2_url @property @@ -280,6 +280,8 @@ def fix_pid_v2(self, pid_v3, correct_pid_v2): nome do arquivo xml """ try: + if not self.fix_pid_v2_url: + return {"fix-pid-v2": "unavailable"} self.token = self.token or self._get_token( username=self.api_username, diff --git a/pid_provider/migrations/0003_pidproviderendpoint.py b/pid_provider/migrations/0003_pidproviderendpoint.py new file mode 100644 index 00000000..fe0d59f9 --- /dev/null +++ b/pid_provider/migrations/0003_pidproviderendpoint.py @@ -0,0 +1,102 @@ +# Generated by Django 4.2.6 on 2024-03-27 03:40 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import modelcluster.fields + + +class Migration(migrations.Migration): + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("pid_provider", "0002_alter_pidproviderxml_options_fixpidv2"), + ] + + operations = [ + migrations.CreateModel( + name="PidProviderEndpoint", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "name", + models.CharField( + blank=True, + choices=[("fix-pid-v2", "fix-pid-v2")], + max_length=16, + null=True, + verbose_name="Endpoint name", + ), + ), + ( + "url", + models.URLField( + blank=True, + max_length=128, + null=True, + verbose_name="Endpoint URL", + ), + ), + ("enabled", models.BooleanField(default=False)), + ( + "config", + modelcluster.fields.ParentalKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="endpoint", + to="pid_provider.pidproviderconfig", + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "indexes": [ + models.Index(fields=["name"], name="pid_provide_name_870abe_idx"), + models.Index( + fields=["enabled"], name="pid_provide_enabled_5b5f83_idx" + ), + ], + }, + ), + ] diff --git a/pid_provider/models.py b/pid_provider/models.py index 6afd6b2e..c8cd41d0 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -21,6 +21,7 @@ from core.forms import CoreAdminModelForm from core.models import CommonControlField from pid_provider import exceptions +from pid_provider import choices from tracker.models import UnexpectedEvent LOGGER = logging.getLogger(__name__) @@ -155,7 +156,7 @@ def get_or_create(cls, user, pid_provider_xml, xml_with_pre): ) -class PidProviderConfig(CommonControlField): +class PidProviderConfig(CommonControlField, ClusterableModel): """ Tem função de guardar XML que falhou no registro """ @@ -204,11 +205,46 @@ def get_or_create( FieldPanel("api_username"), FieldPanel("api_password"), FieldPanel("timeout"), + InlinePanel("endpoint", label=_("Endpoints")), ] base_form_class = CoreAdminModelForm +class PidProviderEndpoint(CommonControlField): + """ + Registro de PIDs (associados a um PidProviderXML) cujo valor difere do valor atribuído + """ + + config = ParentalKey( + "PidProviderConfig", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="endpoint", + ) + name = models.CharField(_("Endpoint name"), max_length=16, null=True, blank=True, choices=choices.ENDPOINTS) + url = models.URLField( + _("Endpoint URL"), max_length=128, null=True, blank=True + ) + enabled = models.BooleanField(default=False) + + panels = [ + FieldPanel("name"), + FieldPanel("url"), + FieldPanel("enabled"), + ] + + class Meta: + indexes = [ + models.Index(fields=["name"]), + models.Index(fields=["enabled"]), + ] + + def __str__(self): + return f"{self.url} {self.enabled}" + + class PidRequest(CommonControlField): origin = models.CharField( _("Request origin"), max_length=124, null=True, blank=True From 3b9991685c5fde53e2e4a88abf015b42bff690bd Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Wed, 27 Mar 2024 08:18:32 -0300 Subject: [PATCH 10/25] Adiciona 'fixed_in_core': False ao retorno de fix_pid_v2 (#417) --- pid_provider/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pid_provider/client.py b/pid_provider/client.py index 10b92037..964faaa9 100644 --- a/pid_provider/client.py +++ b/pid_provider/client.py @@ -281,7 +281,7 @@ def fix_pid_v2(self, pid_v3, correct_pid_v2): """ try: if not self.fix_pid_v2_url: - return {"fix-pid-v2": "unavailable"} + return {"fix-pid-v2": "unavailable", "fixed_in_core": False} self.token = self.token or self._get_token( username=self.api_username, From b7beffe3d52b1de7ac3c42de9c4e1a8b48f7c4da Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Wed, 27 Mar 2024 08:28:25 -0300 Subject: [PATCH 11/25] Evita que SPSPkg armazene arquivos em excesso (#418) --- package/models.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/package/models.py b/package/models.py index 1e995c5e..5c983225 100644 --- a/package/models.py +++ b/package/models.py @@ -601,15 +601,25 @@ def save_pkg_zip_file(self, user, zip_file_path): package = SPPackage.from_file(zip_file_path, workdir) package.optimise(new_package_file_path=target, preserve_files=False) + # saved optimised with open(target, "rb") as fp: - # saved optimised - self.file.save(filename, ContentFile(fp.read())) + self.save_file(filename, fp.read()) except Exception as e: + # saved original with open(zip_file_path, "rb") as fp: - # saved original - self.file.save(filename, ContentFile(fp.read())) + self.save_file(filename, fp.read()) self.save() + def save_file(self, name, content): + try: + self.file.delete(save=True) + except Exception as e: + pass + try: + self.file.save(name, ContentFile(content)) + except Exception as e: + raise Exception(f"Unable to save {name}. Exception: {e}") + def generate_article_html_page(self, user): try: generator = HTMLGenerator.parse( From 911f87ffa77978bdeeee72234954f7f0afd14759 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Wed, 27 Mar 2024 11:15:23 -0300 Subject: [PATCH 12/25] =?UTF-8?q?Verifica=20se=20xml=20registrado=20e=20xm?= =?UTF-8?q?l=20recebido=20s=C3=A3o=20iguais,=20somente=20ap=C3=B3s=20compl?= =?UTF-8?q?etar=20XML=20com=20os=20pids=20registrados=20(#419)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Compara se xml_with_pre é igual ao registrado somente após adicionar os pids registrados se aplicável * Adiciona a funcionalidade de forçar o registro no Core mesmo que o registro está indicando que já está sincronizado --- package/models.py | 3 +++ pid_provider/models.py | 28 ++++++++++++++++++++++++---- pid_provider/requester.py | 34 ++++++++++++++++------------------ proc/tasks.py | 6 ++++++ 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/package/models.py b/package/models.py index 5c983225..b0f78059 100644 --- a/package/models.py +++ b/package/models.py @@ -428,6 +428,9 @@ def supplementary_material(self): def get(cls, pid_v3): return cls.objects.get(pid_v3=pid_v3) + def set_registered_in_core(self, value): + PidRequester.set_registered_in_core(self.pid_v3, value) + @staticmethod def is_registered_in_core(pid_v3): if not pid_v3: diff --git a/pid_provider/models.py b/pid_provider/models.py index c8cd41d0..af38fcc1 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -956,7 +956,7 @@ def is_equal_to(self, xml_adapter): ) @classmethod - def get_registered(cls, xml_with_pre, origin): + def get_registered(cls, xml_with_pre, origin=None): """ Get registered @@ -985,7 +985,9 @@ def get_registered(cls, xml_with_pre, origin): registered = cls._query_document(xml_adapter) if not registered: raise cls.DoesNotExist - return registered.data + response = registered.data.copy() + response["registered"] = True + return response except cls.DoesNotExist: return {"filename": xml_with_pre.filename, "registered": False} except Exception as e: @@ -1000,7 +1002,7 @@ def get_registered(cls, xml_with_pre, origin): detail={ "operation": "PidProviderXML.get_registered", "detail": dict( - origin=origin, + origin=origin or xml_with_pre.filename, ), }, ) @@ -1434,7 +1436,25 @@ def is_registered(cls, xml_with_pre): registered = cls._query_document(xml_adapter) if registered: data = registered.data - data["is_equal"] = registered.is_equal_to(xml_adapter) + + xml_changed = {} + # Completa os valores ausentes de pid com recuperados ou com inéditos + try: + before = (xml_with_pre.v3, xml_with_pre.v2, xml_with_pre.aop_pid) + xml_with_pre.v3 = xml_with_pre.v3 or data["v3"] + xml_with_pre.v2 = xml_with_pre.v2 or data["v2"] + if data["aop_pid"]: + xml_with_pre.aop_pid = data["aop_pid"] + + # verifica se houve mudança nos PIDs do XML + after = (xml_with_pre.v3, xml_with_pre.v2, xml_with_pre.aop_pid) + for label, bef, aft in zip(("pid_v3", "pid_v2", "aop_pid"), before, after): + if bef != aft: + xml_changed[label] = aft + except KeyError: + pass + data["is_equal"] = registered.is_equal_to(xml_with_pre) + data["xml_changed"] = xml_changed return data except ( exceptions.NotEnoughParametersToGetDocumentRecordError, diff --git a/pid_provider/requester.py b/pid_provider/requester.py index 897fc623..6eee1a15 100644 --- a/pid_provider/requester.py +++ b/pid_provider/requester.py @@ -77,6 +77,9 @@ def request_pid_for_xml_with_pre( """ Recebe um xml_with_pre para solicitar o PID v3 """ + # identifica as mudanças no xml_with_pre + xml_changed = {} + main_op = article_proc.start(user, "request_pid_for_xml_with_pre") registered = PidRequester.get_registration_demand( xml_with_pre, article_proc, user @@ -85,23 +88,6 @@ def request_pid_for_xml_with_pre( if registered.get("error_type"): return registered - xml_changed = {} - # Completa os valores ausentes de pid com recuperados ou com inéditos - try: - before = (xml_with_pre.v3, xml_with_pre.v2, xml_with_pre.aop_pid) - xml_with_pre.v3 = xml_with_pre.v3 or registered["v3"] - xml_with_pre.v2 = xml_with_pre.v2 or registered["v2"] - if registered["aop_pid"]: - xml_with_pre.aop_pid = registered["aop_pid"] - - # verifica se houve mudança nos PIDs do XML - after = (xml_with_pre.v3, xml_with_pre.v2, xml_with_pre.aop_pid) - for label, bef, aft in zip(("pid_v3", "pid_v2", "aop_pid"), before, after): - if bef != aft: - xml_changed[label] = aft - except KeyError: - pass - # Solicita pid para Core self.core_registration(xml_with_pre, registered, article_proc, user) xml_changed = xml_changed or registered.get("xml_changed") @@ -156,7 +142,9 @@ def get_registration_demand(xml_with_pre, article_proc, user): """ op = article_proc.start(user, ">>> get registration demand") - registered = PidProviderXML.is_registered(xml_with_pre) or {} + registered = PidProviderXML.is_registered(xml_with_pre) + if registered.get("error_type"): + return registered if registered.get("is_equal"): # xml recebido é igual ao registrado @@ -263,3 +251,13 @@ def fix_pid_v2( fixed["fixed_in_core"] = obj.fixed_in_core logging.info(fixed) return fixed + + @staticmethod + def set_registered_in_core(pid_v3, value): + try: + PidProviderXML.objects.filter( + registered_in_core=bool(not value), + v3=pid_v3, + ).update(registered_in_core=value) + except Exception as e: + logging.exception(e) diff --git a/proc/tasks.py b/proc/tasks.py index e67ca51c..034fb6fb 100644 --- a/proc/tasks.py +++ b/proc/tasks.py @@ -261,6 +261,7 @@ def task_generate_sps_packages( force_update=False, body_and_back_xml=False, html_to_xml=False, + force_core_update=True, ): try: for collection in _get_collections(collection_acron): @@ -279,6 +280,7 @@ def task_generate_sps_packages( "item_id": item.id, "body_and_back_xml": body_and_back_xml, "html_to_xml": html_to_xml, + "force_core_update": force_core_update, } ) except Exception as e: @@ -297,6 +299,7 @@ def task_generate_sps_packages( "force_update": force_update, "body_and_back_xml": body_and_back_xml, "html_to_xml": html_to_xml, + "force_core_update": force_core_update, }, ) @@ -309,10 +312,13 @@ def task_generate_sps_package( html_to_xml=False, username=None, user_id=None, + force_core_update=None, ): try: user = _get_user(user_id, username) item = ArticleProc.objects.get(pk=item_id) + if force_core_update and item.sps_pkg: + item.sps_pkg.set_registered_in_core(False) item.generate_sps_package( user, body_and_back_xml, From 74e97678f4aea2b72e5f73a9915d24ee91c25d39 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel <82840278+samuelveigarangel@users.noreply.github.com> Date: Wed, 27 Mar 2024 14:49:21 -0300 Subject: [PATCH 13/25] Melhora ordem dos itens do menu (#408) * Refatora a funcionalidade da ordem do menu * Reordena menu itens padrao do wagtail e remove algum deless * Insere funcao get_menu_order em menu_order * Altera a ordem dos app --- article/wagtail_hooks.py | 2 +- collection/wagtail_hooks.py | 4 ++-- config/menu.py | 35 ++++++++++++++++++++++++----------- core/wagtail_hooks.py | 15 ++++++++++++++- issue/wagtail_hooks.py | 4 +--- journal/wagtail_hooks.py | 4 +--- migration/wagtail_hooks.py | 1 - pid_provider/wagtail_hooks.py | 3 ++- proc/wagtail_hooks.py | 3 +-- researcher/wagtail_hooks.py | 4 ++-- 10 files changed, 48 insertions(+), 27 deletions(-) diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py index 9c7d8740..4e28b5fa 100644 --- a/article/wagtail_hooks.py +++ b/article/wagtail_hooks.py @@ -120,7 +120,7 @@ class ArticleModelAdmin(ModelAdmin): inspect_view_enabled = True inspect_view_class = ArticleAdminInspectView menu_icon = "doc-full" - menu_order = 200 + menu_order = get_menu_order("article") add_to_settings_menu = False exclude_from_explorer = False diff --git a/collection/wagtail_hooks.py b/collection/wagtail_hooks.py index b92f78cc..4c14ea7c 100644 --- a/collection/wagtail_hooks.py +++ b/collection/wagtail_hooks.py @@ -122,8 +122,8 @@ class ClassicWebsiteConfigurationModelAdmin(ModelAdmin): class CollectionModelAdminGroup(ModelAdminGroup): menu_label = _("Collections") menu_icon = "folder-open-inverse" - # menu_order = get_menu_order("collection") - menu_order = 100 + menu_order = get_menu_order("collection") + # menu_order = 100 items = ( CollectionModelAdmin, WebSiteConfigurationModelAdmin, diff --git a/config/menu.py b/config/menu.py index feed0dae..224eb4a7 100644 --- a/config/menu.py +++ b/config/menu.py @@ -1,14 +1,27 @@ -WAGTAIL_MENU_APPS_ORDER = { - "collection": 400, - "journal": 500, - "issue": 510, - "article": 520, - "upload": 700, - "migration": 710, - "location": 800, - "institution": 810, -} +WAGTAIL_MENU_APPS_ORDER = [ + "Tarefas", + "unexpected-error", + "processing", + "migration", + "journal", + "issue", + "article", + "institution", + "location", + "researcher", + "collection", + "pid_provider", + "Configurações", + "Relatórios", + "Images", + "Documentos", + "Ajuda", +] def get_menu_order(app_name): - return WAGTAIL_MENU_APPS_ORDER.get(app_name) or 100 + try: + return WAGTAIL_MENU_APPS_ORDER.index(app_name) + 1 + except: + return 9000 + diff --git a/core/wagtail_hooks.py b/core/wagtail_hooks.py index aa1af9b5..fc030894 100644 --- a/core/wagtail_hooks.py +++ b/core/wagtail_hooks.py @@ -8,7 +8,7 @@ from collection.models import Collection from article.models import Article from wagtail.admin.navigation import get_site_for_user - +from config.menu import get_menu_order, WAGTAIL_MENU_APPS_ORDER # @hooks.register("insert_global_admin_css", order=100) # def global_admin_css(): @@ -77,3 +77,16 @@ def add_items_summary_items(request, items): items.append(CollectionSummaryItem(request)) items.append(JournalSummaryItem(request)) items.append(ArticleSummaryItem(request)) + + +@hooks.register('construct_main_menu') +def reorder_menu_items(request, menu_items): + for item in menu_items: + if item.label in WAGTAIL_MENU_APPS_ORDER: + item.order = get_menu_order(item.label) + + +@hooks.register('construct_main_menu') +def remove_menu_items(request, menu_items): + if not request.user.is_superuser: + menu_items[:] = [item for item in menu_items if item.name not in ['documents', 'explorer', 'reports']] \ No newline at end of file diff --git a/issue/wagtail_hooks.py b/issue/wagtail_hooks.py index ea499b6c..9175b404 100644 --- a/issue/wagtail_hooks.py +++ b/issue/wagtail_hooks.py @@ -24,8 +24,7 @@ class IssueAdmin(ModelAdmin): menu_label = _("Issues") create_view_class = IssueCreateView menu_icon = "folder" - # menu_order = get_menu_order("issue") - menu_order = 300 + menu_order = get_menu_order("issue") add_to_settings_menu = False exclude_from_explorer = False @@ -63,7 +62,6 @@ class IssueModelAdminGroup(ModelAdminGroup): IssueAdmin, # IssueProcAdmin, ) - menu_order = get_menu_order("journal") # modeladmin_register(IssueModelAdminGroup) diff --git a/journal/wagtail_hooks.py b/journal/wagtail_hooks.py index 2209a53a..98903dbd 100644 --- a/journal/wagtail_hooks.py +++ b/journal/wagtail_hooks.py @@ -69,14 +69,12 @@ class JournalAdmin(ModelAdmin): class JournalModelAdminGroup(ModelAdminGroup): menu_icon = "folder" menu_label = _("Journals") - # menu_order = get_menu_order("journal") - menu_order = 200 + menu_order = get_menu_order("journal") items = ( OfficialJournalAdmin, JournalAdmin, # JournalProcAdmin, ) - menu_order = get_menu_order("journal") modeladmin_register(JournalModelAdminGroup) diff --git a/migration/wagtail_hooks.py b/migration/wagtail_hooks.py index af56d47f..5b4d138b 100644 --- a/migration/wagtail_hooks.py +++ b/migration/wagtail_hooks.py @@ -295,7 +295,6 @@ class MigrationModelAdmin(ModelAdminGroup): MigratedArticleModelAdmin, MigratedFileModelAdmin, ) - menu_order = get_menu_order("migration") modeladmin_register(MigrationModelAdmin) diff --git a/pid_provider/wagtail_hooks.py b/pid_provider/wagtail_hooks.py index 810d5b2d..67b23eeb 100644 --- a/pid_provider/wagtail_hooks.py +++ b/pid_provider/wagtail_hooks.py @@ -7,6 +7,7 @@ ) from wagtail.contrib.modeladmin.views import CreateView +from config.menu import get_menu_order from .models import ( PidProviderConfig, CollectionPidRequest, @@ -209,7 +210,7 @@ class FixPidV2Admin(ModelAdmin): class PidProviderAdminGroup(ModelAdminGroup): menu_label = _("Pid Provider") menu_icon = "folder-open-inverse" # change as required - menu_order = 6 + menu_order = get_menu_order("pid_provider") items = ( PidProviderConfigAdmin, PidProviderXMLAdmin, diff --git a/proc/wagtail_hooks.py b/proc/wagtail_hooks.py index 04541f23..f5d73c06 100644 --- a/proc/wagtail_hooks.py +++ b/proc/wagtail_hooks.py @@ -213,8 +213,7 @@ class ArticleProcModelAdmin(ModelAdmin): class ProcessModelAdminGroup(ModelAdminGroup): menu_label = _("Processing") menu_icon = "folder-open-inverse" - # menu_order = get_menu_order("article") - menu_order = 400 + menu_order = get_menu_order("processing") items = ( JournalProcModelAdmin, IssueProcModelAdmin, diff --git a/researcher/wagtail_hooks.py b/researcher/wagtail_hooks.py index da9f2816..971c2837 100644 --- a/researcher/wagtail_hooks.py +++ b/researcher/wagtail_hooks.py @@ -4,7 +4,7 @@ from wagtail.contrib.modeladmin.views import CreateView from .models import Researcher - +from config.menu import get_menu_order class ResearcherCreateView(CreateView): def form_valid(self, form): @@ -17,7 +17,7 @@ class ResearcherAdmin(ModelAdmin): create_view_class = ResearcherCreateView menu_label = _("Researcher") menu_icon = "folder" - menu_order = 200 + menu_order = get_menu_order("researcher") add_to_settings_menu = False exclude_from_explorer = False From cb75b168781b7c9389db2ebc0aa3e6463c6db322 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Thu, 28 Mar 2024 10:29:24 -0300 Subject: [PATCH 14/25] =?UTF-8?q?Move=20as=20opera=C3=A7=C3=B5es=20anterio?= =?UTF-8?q?res=20de=20ArticleProc,=20IssueProc,=20JournalProc=20para=20um?= =?UTF-8?q?=20arquivo=20(#420)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria o modelo ArticleProcReport e ArticleProcReportModelAdmin * Cria o modelo ProcReport para armazenar processamentos anteriores, mantendo apenas o vigente nos respectivos ArticleProc, IssueProc, JournalProc * Adiciona as migrações de banco de dados --- proc/migrations/0002_procreport.py | 119 ++++++++++ .../0003_procreport_item_type_and_more.py | 28 +++ proc/models.py | 224 ++++++++++++++++-- proc/wagtail_hooks.py | 35 ++- 4 files changed, 387 insertions(+), 19 deletions(-) create mode 100644 proc/migrations/0002_procreport.py create mode 100644 proc/migrations/0003_procreport_item_type_and_more.py diff --git a/proc/migrations/0002_procreport.py b/proc/migrations/0002_procreport.py new file mode 100644 index 00000000..129c8c12 --- /dev/null +++ b/proc/migrations/0002_procreport.py @@ -0,0 +1,119 @@ +# Generated by Django 5.0.3 on 2024-03-28 11:53 + +import django.db.models.deletion +import proc.models +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("collection", "0002_remove_websiteconfiguration_api_token_and_more"), + ("proc", "0001_initial"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="ProcReport", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "pid", + models.CharField( + blank=True, max_length=23, null=True, verbose_name="PID" + ), + ), + ( + "task_name", + models.CharField( + blank=True, + max_length=32, + null=True, + verbose_name="Procedure name", + ), + ), + ( + "file", + models.FileField( + blank=True, + null=True, + upload_to=proc.models.proc_report_directory_path, + ), + ), + ( + "report_date", + models.CharField( + blank=True, + max_length=34, + null=True, + verbose_name="Identification", + ), + ), + ( + "collection", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="collection.collection", + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "verbose_name": "Processing report", + "verbose_name_plural": "Processing reports", + "indexes": [ + models.Index(fields=["pid"], name="proc_procre_pid_2ea179_idx"), + models.Index( + fields=["task_name"], name="proc_procre_task_na_33520a_idx" + ), + models.Index( + fields=["report_date"], name="proc_procre_report__370dc9_idx" + ), + ], + }, + ), + ] diff --git a/proc/migrations/0003_procreport_item_type_and_more.py b/proc/migrations/0003_procreport_item_type_and_more.py new file mode 100644 index 00000000..4dce6efe --- /dev/null +++ b/proc/migrations/0003_procreport_item_type_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 5.0.3 on 2024-03-28 12:58 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("collection", "0002_remove_websiteconfiguration_api_token_and_more"), + ("proc", "0002_procreport"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddField( + model_name="procreport", + name="item_type", + field=models.CharField( + blank=True, max_length=16, null=True, verbose_name="Item type" + ), + ), + migrations.AddIndex( + model_name="procreport", + index=models.Index( + fields=["item_type"], name="proc_procre_item_ty_0b33db_idx" + ), + ), + ] diff --git a/proc/models.py b/proc/models.py index 9f363414..3e7b8a4a 100644 --- a/proc/models.py +++ b/proc/models.py @@ -37,7 +37,11 @@ MigratedIssue, MigratedJournal, ) -from migration.controller import PkgZipBuilder, get_migrated_xml_with_pre, XMLVersionXmlWithPreError +from migration.controller import ( + PkgZipBuilder, + get_migrated_xml_with_pre, + XMLVersionXmlWithPreError, +) from package import choices as package_choices from package.models import SPSPkg from proc import exceptions @@ -92,6 +96,16 @@ class Meta: def __str__(self): return f"{self.name} {self.started} {self.finished} {self.completed}" + @property + def data(self): + return dict( + name=self.name, + completed=self.completed, + event=self.event and self.event.data, + detail=self.detail, + created=self.created.isoformat(), + ) + @property def started(self): return self.created and self.created.isoformat() or "" @@ -100,6 +114,47 @@ def started(self): def finished(self): return self.updated and self.updated.isoformat() or "" + @classmethod + def create(cls, user, proc, name): + for item in cls.objects.filter(proc=proc, name=name).order_by('created'): + # obtém o primeiro ocorrência de proc e name + + # obtém todos os ítens criados após este evento + rows = [] + for row in cls.objects.filter(proc=proc, created__gte=item.created).iterator(): + rows.append(row.data) + + try: + # converte para json + file_content = json.dumps(rows) + file_extension = ".json" + except Exception as e: + # caso não seja serializável, converte para str + file_content = str(rows) + file_extension = ".txt" + logging.info(proc.pid) + logging.exception(e) + + try: + report_date = item.created.isoformat() + # cria um arquivo com o conteúdo + ProcReport.create_or_update( + user, proc, name, report_date, file_content, file_extension, + ) + # apaga todas as ocorrências que foram armazenadas no arquivo + cls.objects.filter(proc=proc, created__gte=item.created).delete() + except Exception as e: + logging.info(proc.pid) + logging.exception(e) + break + + obj = cls() + obj.proc = proc + obj.name = name + obj.creator = user + obj.save() + return obj + @classmethod def start( cls, @@ -108,12 +163,7 @@ def start( name=None, ): try: - obj = cls() - obj.proc = proc - obj.name = name - obj.creator = user - obj.save() - return obj + return cls.create(user, proc, name) except Exception as exc: raise OperationStartError( f"Unable to create Operation ({name}). EXCEPTION: {type(exc)} {exc}" @@ -179,6 +229,144 @@ def finish( ) +def proc_report_directory_path(instance, filename): + try: + subdir = instance.directory_path + YYYY = instance.report_date[:4] + return f"archive/{subdir}/proc/{YYYY}/{filename}" + except AttributeError: + return f"archive/{filename}" + + +class ProcReport(CommonControlField): + collection = models.ForeignKey( + Collection, on_delete=models.SET_NULL, null=True, blank=True + ) + + pid = models.CharField(_("PID"), max_length=23, null=True, blank=True) + task_name = models.CharField( + _("Procedure name"), max_length=32, null=True, blank=True + ) + file = models.FileField(upload_to=proc_report_directory_path, null=True, blank=True) + report_date = models.CharField( + _("Identification"), max_length=34, null=True, blank=True + ) + item_type = models.CharField(_("Item type"), max_length=16, null=True, blank=True) + + panel_files = [ + FieldPanel("task_name"), + FieldPanel("report_date"), + FieldPanel("file"), + ] + + def __str__(self): + return f"{self.collection.acron} {self.pid} {self.task_name} {self.report_date}" + + class Meta: + verbose_name = _("Processing report") + verbose_name_plural = _("Processing reports") + indexes = [ + models.Index(fields=["item_type"]), + models.Index(fields=["pid"]), + models.Index(fields=["task_name"]), + models.Index(fields=["report_date"]), + ] + + @staticmethod + def autocomplete_custom_queryset_filter(search_term): + return ProcReport.objects.filter( + Q(pid__icontains=search_term) + | Q(collection__acron__icontains=search_term) + | Q(collection__name__icontains=search_term) + | Q(task_name__icontains=search_term) + | Q(report_date__icontains=search_term) + ) + + def autocomplete_label(self): + return str(self) + + def save_file(self, name, content): + try: + self.file.delete(save=True) + except Exception as e: + pass + try: + self.file.save(name, ContentFile(content)) + except Exception as e: + raise Exception(f"Unable to save {name}. Exception: {e}") + + @classmethod + def get(cls, proc=None, task_name=None, report_date=None): + if proc and task_name and report_date: + try: + return cls.objects.get( + collection=proc.collection, pid=proc.pid, + task_name=task_name, + report_date=report_date, + ) + except cls.MultipleObjectsReturned: + return cls.objects.filter( + collection=proc.collection, pid=proc.pid, + task_name=task_name, + report_date=report_date, + ).first() + raise ValueError( + "ProcReport.get requires proc and task_name and report_date" + ) + + @staticmethod + def get_item_type(pid): + if len(pid) == 23: + return "article" + if len(pid) == 9: + return "journal" + return "issue" + + @classmethod + def create(cls, user, proc, task_name, report_date, file_content, file_extension): + if proc and task_name and report_date and file_content and file_extension: + try: + obj = cls() + obj.collection = proc.collection + obj.pid = proc.pid + obj.task_name = task_name + obj.item_type = ProcReport.get_item_type(proc.pid) + obj.report_date = report_date + obj.creator = user + obj.save() + obj.save_file(f"{task_name}{file_extension}", file_content) + return obj + except IntegrityError: + return cls.get(proc, task_name, report_date) + raise ValueError( + "ProcReport.create requires proc and task_name and report_date and file_content and file_extension" + ) + + @classmethod + def create_or_update(cls, user, proc, task_name, report_date, file_content, file_extension): + try: + obj = cls.get( + proc=proc, task_name=task_name, report_date=report_date + ) + obj.updated_by = user + obj.task_name = task_name or obj.task_name + obj.report_date = report_date or obj.report_date + obj.save() + obj.save_file(f"{task_name}{file_extension}", file_content) + except cls.DoesNotExist: + obj = cls.create(user, proc, task_name, report_date, file_content, file_extension) + return obj + + @property + def directory_path(self): + pid = self.pid + if len(self.pid) == 23: + pid = self.pid[1:] + paths = [self.collection.acron, pid[:9], pid[9:13], pid[13:17], pid[17:]] + paths = [path for path in paths if path] + return os.path.join(*paths) + + class JournalProcResult(Operation, Orderable): proc = ParentalKey("JournalProc", related_name="journal_proc_result") @@ -716,7 +904,9 @@ def update( ) @classmethod - def files_to_migrate(cls, collection, journal_acron, publication_year, force_update): + def files_to_migrate( + cls, collection, journal_acron, publication_year, force_update + ): """ Muda o status de PROGRESS_STATUS_REPROC para PROGRESS_STATUS_TODO E se force_update = True, muda o status de PROGRESS_STATUS_DONE para PROGRESS_STATUS_TODO @@ -737,9 +927,9 @@ def files_to_migrate(cls, collection, journal_acron, publication_year, force_upd params = {} if publication_year: - params['issue__publication_year'] = publication_year + params["issue__publication_year"] = publication_year if journal_acron: - params['journal_proc__acron'] = journal_acron + params["journal_proc__acron"] = journal_acron return cls.objects.filter( files_status=tracker_choices.PROGRESS_STATUS_TODO, @@ -818,9 +1008,9 @@ def docs_to_migrate(cls, collection, journal_acron, publication_year, force_upda params = {} if publication_year: - params['issue__publication_year'] = publication_year + params["issue__publication_year"] = publication_year if journal_acron: - params['journal_proc__acron'] = journal_acron + params["journal_proc__acron"] = journal_acron return cls.objects.filter( docs_status=tracker_choices.PROGRESS_STATUS_TODO, @@ -933,6 +1123,7 @@ class ArticleProc(BaseProc, ClusterableModel): ) ProcResult = ArticleProcResult + panel_files = [ FieldPanel("pkg_name"), AutocompletePanel("sps_pkg"), @@ -1020,7 +1211,7 @@ def get_xml(self, user, htmlxml, body_and_back_xml): operation.finish( user, - completed=self.xml_status==tracker_choices.PROGRESS_STATUS_DONE, + completed=self.xml_status == tracker_choices.PROGRESS_STATUS_DONE, ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -1219,8 +1410,7 @@ def generate_sps_package( def fix_pid_v2(self, user): if self.sps_pkg: - self.sps_pkg.fix_pid_v2( - user, correct_pid_v2=self.migrated_data.pid) + self.sps_pkg.fix_pid_v2(user, correct_pid_v2=self.migrated_data.pid) def update_sps_pkg_status(self): if not self.sps_pkg: @@ -1251,9 +1441,7 @@ def synchronize(self, user): operation = self.start(user, "synchronize to core") self.sps_pkg.synchronize(user, self) - operation.finish( - user, completed=self.sps_pkg.registered_in_core - ) + operation.finish(user, completed=self.sps_pkg.registered_in_core) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() diff --git a/proc/wagtail_hooks.py b/proc/wagtail_hooks.py index f5d73c06..a56473c7 100644 --- a/proc/wagtail_hooks.py +++ b/proc/wagtail_hooks.py @@ -15,7 +15,7 @@ from package.models import SPSPkg from htmlxml.models import HTMLXML -from .models import ArticleProc, IssueProc, JournalProc +from .models import ArticleProc, IssueProc, JournalProc, ProcReport class ProcCreateView(CreateView): @@ -210,6 +210,38 @@ class ArticleProcModelAdmin(ModelAdmin): ) +class ProcReportModelAdmin(ModelAdmin): + model = ProcReport + menu_label = _("Processing Report") + inspect_view_enabled = True + menu_icon = "doc-full" + menu_order = 200 + add_to_settings_menu = False + exclude_from_explorer = False + + list_per_page = 50 + + list_display = ( + "pid", + "collection", + "task_name", + "report_date", + "updated", + "created", + ) + list_filter = ( + "task_name", + "collection", + "item_type", + ) + search_fields = ( + "pid", + "collection__name", + "task_name", + "report_date", + ) + + class ProcessModelAdminGroup(ModelAdminGroup): menu_label = _("Processing") menu_icon = "folder-open-inverse" @@ -220,6 +252,7 @@ class ProcessModelAdminGroup(ModelAdminGroup): HTMLXMLModelAdmin, SPSPkgModelAdmin, ArticleProcModelAdmin, + ProcReportModelAdmin, ) From ebb61b5eab2687d08058760694eecb3e02dcfd78 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:50:47 -0300 Subject: [PATCH 15/25] =?UTF-8?q?Melhora=20o=20registro=20das=20opera?= =?UTF-8?q?=C3=A7=C3=B5es=20das=20tarefas=20relacionadas=20=C3=A0=20migra?= =?UTF-8?q?=C3=A7=C3=A3o=20e=20publica=C3=A7=C3=A3o=20(#422)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Melhora os rótulos, deixa todos os campos não editáveis, apresenta os eventos do mais recente para o mais antigo * Adiciona Article.data, Issue.data, Journal.data * Adiciona retorno às função que criam instâncias de Article, Issue e Journal * Adiciona Article.data, Issue.data, Journal.data nos detalhes das operações de entrada de dados * Aplica black * Adiciona * Adiciona mais detalhes ao registro da tarefa de gerar o XML a partir do HTML * Adiciona mais detalhes ao registro da tarefa de gerar o pacote SPS * Corrige o valor de 'completed' dos resultados das operações de solicitação de pid v3 * Adiciona o parâmetro compression em ZipFile * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Modifica o sps_pkg_status para DONE se o pacote não tem todos os texts * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Corrige ausência de importação de ZIP_DEFLATED * Adiciona o atributo order para a listagem dos itens na área administrativa * Adiciona as migrações de banco de dados * Adiciona detalhes do processamento da adição de arquivos no minio --- article/models.py | 18 +++++- .../migrations/0002_alter_htmlxml_options.py | 16 +++++ htmlxml/models.py | 48 +++++++++++---- issue/models.py | 19 +++++- journal/models.py | 11 ++++ migration/controller.py | 4 +- .../migrations/0002_alter_spspkg_options.py | 16 +++++ package/models.py | 23 ++++--- pid_provider/requester.py | 10 ++-- proc/controller.py | 11 ++-- ...ptions_alter_issueproc_options_and_more.py | 36 +++++++++++ proc/models.py | 60 +++++++++++-------- tracker/models.py | 3 +- 13 files changed, 216 insertions(+), 59 deletions(-) create mode 100644 htmlxml/migrations/0002_alter_htmlxml_options.py create mode 100644 package/migrations/0002_alter_spspkg_options.py create mode 100644 proc/migrations/0004_alter_articleproc_options_alter_issueproc_options_and_more.py diff --git a/article/models.py b/article/models.py index 19a6ef48..04440dec 100644 --- a/article/models.py +++ b/article/models.py @@ -117,13 +117,25 @@ class Meta: base_form_class = ArticleForm - autocomplete_search_field = "pid_v3" + autocomplete_search_field = "sps_pkg__sps_pkg_name" def autocomplete_label(self): - return self.pid_v3 + return self.sps_pkg.sps_pkg_name def __str__(self): - return f"{self.pid_v3}" + return f"{self.sps_pkg.sps_pkg_name}" + + @property + def data(self): + # TODO completar com itens que identifique o artigo + return dict( + xml=self.sps_pkg and self.sps_pkg.xml_uri, + issue=self.issue.data, + journal=self.journal.data, + pid_v3=self.pid_v3, + created=created.isoformat(), + updated=updated.isoformat(), + ) @classmethod def get(cls, pid_v3): diff --git a/htmlxml/migrations/0002_alter_htmlxml_options.py b/htmlxml/migrations/0002_alter_htmlxml_options.py new file mode 100644 index 00000000..95b3d558 --- /dev/null +++ b/htmlxml/migrations/0002_alter_htmlxml_options.py @@ -0,0 +1,16 @@ +# Generated by Django 5.0.3 on 2024-03-29 17:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("htmlxml", "0001_initial"), + ] + + operations = [ + migrations.AlterModelOptions( + name="htmlxml", + options={"ordering": ["-updated"]}, + ), + ] diff --git a/htmlxml/models.py b/htmlxml/models.py index c2fdd342..40813083 100644 --- a/htmlxml/models.py +++ b/htmlxml/models.py @@ -23,6 +23,7 @@ from core.models import CommonControlField from package.models import BasicXMLFile from migration.models import MigratedArticle + # from tracker.models import EventLogger from tracker import choices as tracker_choices @@ -84,6 +85,7 @@ class BodyAndBackFile(BasicXMLFile, Orderable): ] class Meta: + indexes = [ models.Index(fields=["version"]), ] @@ -133,9 +135,9 @@ def create_or_update(cls, user, bb_parent, version, file_content, pkg_name): return obj except Exception as e: raise exceptions.CreateOrUpdateBodyAndBackFileError( - _( - "Unable to create_or_update_body and back file {} {} {} {}" - ).format(bb_parent, version, type(e), e) + _("Unable to create_or_update_body and back file {} {} {} {}").format( + bb_parent, version, type(e), e + ) ) @@ -214,6 +216,7 @@ def data(self): ] class Meta: + indexes = [ models.Index(fields=["attention_demands"]), ] @@ -491,6 +494,8 @@ def autocomplete_label(self): return self.migrated_article class Meta: + ordering = ['-updated'] + indexes = [ models.Index(fields=["html2xml_status"]), models.Index(fields=["quality"]), @@ -561,14 +566,25 @@ def html_to_xml( ): try: self.html2xml_status = tracker_choices.PROGRESS_STATUS_DOING - self.html_translation_langs = "-".join(sorted(article_proc.translations.keys())) - self.pdf_langs = "-".join(sorted([item.lang or article_proc.main_lang for item in article_proc.renditions])) + self.html_translation_langs = "-".join( + sorted(article_proc.translations.keys()) + ) + self.pdf_langs = "-".join( + sorted( + [ + item.lang or article_proc.main_lang + for item in article_proc.renditions + ] + ) + ) self.save() document = Document(article_proc.migrated_data.data) document._translated_html_by_lang = article_proc.translations - body_and_back = self._generate_xml_body_and_back(user, article_proc, document) + body_and_back = self._generate_xml_body_and_back( + user, article_proc, document + ) xml_content = self._generate_xml_from_html(user, article_proc, document) if xml_content and body_and_back: @@ -615,7 +631,14 @@ def generate_report(self, user, article_proc): else: self.quality = choices.HTML2XML_QA_NOT_EVALUATED self.save() - op.finish(user, completed=True) + op.finish( + user, + completed=True, + detail={ + "attention_demands": self.attention_demands, + "quality": self.quality, + }, + ) except Exception as e: op.finish(user, completed=False, detail={"error": str(e)}) @@ -625,9 +648,12 @@ def _generate_xml_body_and_back(self, user, article_proc, document): """ done = False operation = article_proc.start(user, "generate xml body and back") + + languages = document._translated_html_by_lang detail = {} + detail.update(languages) try: - document.generate_body_and_back_from_html(document._translated_html_by_lang) + document.generate_body_and_back_from_html(languages) done = True except GenerateBodyAndBackFromHTMLError as e: # cria xml_body_and_back padrão @@ -645,7 +671,7 @@ def _generate_xml_body_and_back(self, user, article_proc, document): file_content=xml_body_and_back, pkg_name=article_proc.pkg_name, ) - + detail["xml_to_html_steps"] = i operation.finish(user, done, detail=detail) return done @@ -655,7 +681,9 @@ def _generate_xml_from_html(self, user, article_proc, document): detail = {} try: xml_content = document.generate_full_xml(None).decode("utf-8") - self.save_file(article_proc.pkg_name + ".xml", xml_content) + xml_file = article_proc.pkg_name + ".xml" + self.save_file(xml_file, xml_content) + detail["xml"] = xml_file except Exception as e: detail = {"error": str(e)} operation.finish(user, bool(xml_content), detail=detail) diff --git a/issue/models.py b/issue/models.py index 860b0012..af270dfd 100644 --- a/issue/models.py +++ b/issue/models.py @@ -47,6 +47,18 @@ def __str__(self): supplement = models.CharField(_("Supplement"), max_length=16, null=True, blank=True) publication_year = models.CharField(_("Year"), max_length=4, null=True, blank=True) + @property + def data(self): + return dict( + journal=self.journal.data, + volume=self.volume, + number=self.number, + supplement=self.supplement, + publication_year=self.publication_year, + created=created.isoformat(), + updated=updated.isoformat(), + ) + @staticmethod def autocomplete_custom_queryset_filter(search_term): parts = search_term.split() @@ -60,7 +72,12 @@ def autocomplete_custom_queryset_filter(search_term): ) def autocomplete_label(self): - return f"{self.journal.title} {self.volume or self.number}" + return "%s %s%s%s" % ( + self.journal, + self.volume and f"v{self.volume}", + self.number and f"n{self.number}", + self.supplement and f"s{self.supplement}", + ) panels = [ AutocompletePanel("journal"), diff --git a/journal/models.py b/journal/models.py index e932d752..5cfbc484 100644 --- a/journal/models.py +++ b/journal/models.py @@ -167,6 +167,17 @@ def __str__(self): ] ) + @property + def data(self): + return dict( + title=self.title, + issn_print=self.official_journal.issn_print, + issn_electronic=self.official_journal.issn_electronic, + foundation_year=self.official_journal.foundation_year, + created=created.isoformat(), + updated=updated.isoformat(), + ) + def autocomplete_label(self): return self.title or self.official_journal.title diff --git a/migration/controller.py b/migration/controller.py index c8ccbd7c..3045e35e 100644 --- a/migration/controller.py +++ b/migration/controller.py @@ -3,7 +3,7 @@ import sys from copy import deepcopy from datetime import datetime -from zipfile import ZipFile +from zipfile import ZipFile, ZIP_DEFLATED from django.utils.translation import gettext_lazy as _ from scielo_classic_website import classic_ws @@ -327,7 +327,7 @@ def build_sps_package( sps_pkg_zip_path = os.path.join(output_folder, f"{self.sps_pkg_name}.zip") # cria pacote zip - with ZipFile(sps_pkg_zip_path, "w") as zf: + with ZipFile(sps_pkg_zip_path, "w", compression=ZIP_DEFLATED) as zf: # A partir do XML, obtém os nomes dos arquivos dos ativos digitais self._build_sps_package_add_assets(zf, issue_proc) diff --git a/package/migrations/0002_alter_spspkg_options.py b/package/migrations/0002_alter_spspkg_options.py new file mode 100644 index 00000000..a6171c44 --- /dev/null +++ b/package/migrations/0002_alter_spspkg_options.py @@ -0,0 +1,16 @@ +# Generated by Django 5.0.3 on 2024-03-29 17:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("package", "0001_initial"), + ] + + operations = [ + migrations.AlterModelOptions( + name="spspkg", + options={"ordering": ["-updated"]}, + ), + ] diff --git a/package/models.py b/package/models.py index b0f78059..3a746304 100644 --- a/package/models.py +++ b/package/models.py @@ -4,7 +4,7 @@ import sys from datetime import datetime from tempfile import TemporaryDirectory -from zipfile import ZipFile +from zipfile import ZipFile, ZIP_DEFLATED from django.core.files.base import ContentFile from django.db.models import Q @@ -392,6 +392,8 @@ def __str__(self): ) class Meta: + ordering = ['-updated'] + indexes = [ models.Index(fields=["pid_v3"]), models.Index(fields=["sps_pkg_name"]), @@ -482,7 +484,6 @@ def create_or_update( obj.validate(True) - logging.info(f"Depois de criar sps_pkg.pid_v3: {obj.pid_v3}") article_proc.update_sps_pkg_status() operation.finish(user, completed=obj.is_complete, detail=obj.data) @@ -570,7 +571,7 @@ def add_pid_v3_to_zip(cls, user, zip_xml_file_path, is_public, article_proc): if response.get("xml_changed"): # atualiza conteúdo de zip - with ZipFile(zip_xml_file_path, "a") as zf: + with ZipFile(zip_xml_file_path, "a", compression=ZIP_DEFLATED) as zf: zf.writestr( response["filename"], xml_with_pre.tostring(pretty_print=True), @@ -687,7 +688,12 @@ def _save_components_in_cloud(self, user, original_pkg_components, article_proc) component_data, failures, ) - op.finish(user, completed=not failures, detail=failures) + items = [ + dict(basename=c.basename, uri=c.uri) + for c in self.components.all() + ] + detail = {"items": items, "failures": failures} + op.finish(user, completed=not failures, detail=detail) return xml_with_pre def _save_component_in_cloud( @@ -704,8 +710,8 @@ def _save_component_in_cloud( uri = None failures.append( dict( - item_id=item, - response=response, + basename=item, + error=str(e), ) ) self.components.add( @@ -741,7 +747,6 @@ def _save_xml_in_cloud(self, user, xml_with_pre, article_proc): uri = response["uri"] except Exception as e: uri = None - op.finish(user, completed=False, detail=response) self.xml_uri = uri self.save() self.components.add( @@ -755,7 +760,7 @@ def _save_xml_in_cloud(self, user, xml_with_pre, article_proc): legacy_uri=None, ) ) - op.finish(user, completed=True) + op.finish(user, completed=bool(uri), detail=response) def synchronize(self, user, article_proc): zip_xml_file_path = self.file.path @@ -769,7 +774,7 @@ def synchronize(self, user, article_proc): if response.get("v3") and self.pid_v3 != response.get("v3"): # atualiza conteúdo de zip - with ZipFile(zip_xml_file_path, "a") as zf: + with ZipFile(zip_xml_file_path, "a", compression=ZIP_DEFLATED) as zf: zf.writestr( response["filename"], response["xml_with_pre"].tostring(pretty_print=True), diff --git a/pid_provider/requester.py b/pid_provider/requester.py index 6eee1a15..32dfc9ea 100644 --- a/pid_provider/requester.py +++ b/pid_provider/requester.py @@ -86,6 +86,7 @@ def request_pid_for_xml_with_pre( ) if registered.get("error_type"): + main_op.finish(user, completed=False, detail=registered) return registered # Solicita pid para Core @@ -114,7 +115,7 @@ def request_pid_for_xml_with_pre( registered["registered_in_upload"] = bool(resp.get("v3")) op.finish( user, - completed=True, + completed=registered["registered_in_upload"], detail={"registered": registered, "response": resp}, ) @@ -127,7 +128,7 @@ def request_pid_for_xml_with_pre( detail = registered.copy() detail["xml_with_pre"] = xml_with_pre.data - main_op.finish(user, completed=True, detail={"registered": detail}) + main_op.finish(user, completed=registered["synchronized"], detail=detail) return registered @staticmethod @@ -144,6 +145,7 @@ def get_registration_demand(xml_with_pre, article_proc, user): registered = PidProviderXML.is_registered(xml_with_pre) if registered.get("error_type"): + op.finish(user, completed=False, detail=registered) return registered if registered.get("is_equal"): @@ -155,7 +157,7 @@ def get_registration_demand(xml_with_pre, article_proc, user): registered["do_core_registration"] = True registered["do_upload_registration"] = True - op.finish(user, completed=True, detail={"registered": registered}) + op.finish(user, completed=True, detail=registered) return registered @@ -187,7 +189,7 @@ def core_registration(self, xml_with_pre, registered, article_proc, user): registered["registered_in_core"] = bool(response.get("v3")) op.finish( user, - completed=True, + completed=registered["registered_in_core"], detail={"registered": registered, "response": response}, ) diff --git a/proc/controller.py b/proc/controller.py index e2fe69ad..daa504d7 100644 --- a/proc/controller.py +++ b/proc/controller.py @@ -23,7 +23,7 @@ def create_or_update_journal( journal_proc.migration_status != tracker_choices.PROGRESS_STATUS_TODO and not force_update ): - return + return journal_proc.journal collection = journal_proc.collection journal_data = journal_proc.migrated_data.data @@ -56,6 +56,7 @@ def create_or_update_journal( migration_status=tracker_choices.PROGRESS_STATUS_DONE, force_update=force_update, ) + return journal def create_or_update_issue( @@ -70,7 +71,7 @@ def create_or_update_issue( issue_proc.migration_status != tracker_choices.PROGRESS_STATUS_TODO and not force_update ): - return + return issue_proc.issue classic_website_issue = classic_ws.Issue(issue_proc.migrated_data.data) journal_proc = JournalProc.get( @@ -96,6 +97,7 @@ def create_or_update_issue( migration_status=tracker_choices.PROGRESS_STATUS_DONE, force_update=force_update, ) + return issue def create_or_update_article( @@ -110,9 +112,10 @@ def create_or_update_article( article_proc.migration_status != tracker_choices.PROGRESS_STATUS_TODO and not force_update ): - return + return article_proc.article - create_article(article_proc.sps_pkg, user, force_update) + article = create_article(article_proc.sps_pkg, user, force_update) article_proc.migration_status = tracker_choices.PROGRESS_STATUS_DONE article_proc.updated_by = user article_proc.save() + return article["article"] diff --git a/proc/migrations/0004_alter_articleproc_options_alter_issueproc_options_and_more.py b/proc/migrations/0004_alter_articleproc_options_alter_issueproc_options_and_more.py new file mode 100644 index 00000000..fa7cbe2b --- /dev/null +++ b/proc/migrations/0004_alter_articleproc_options_alter_issueproc_options_and_more.py @@ -0,0 +1,36 @@ +# Generated by Django 5.0.3 on 2024-03-29 17:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("proc", "0003_procreport_item_type_and_more"), + ] + + operations = [ + migrations.AlterModelOptions( + name="articleproc", + options={"ordering": ["-updated"]}, + ), + migrations.AlterModelOptions( + name="issueproc", + options={"ordering": ["-updated"]}, + ), + migrations.AlterModelOptions( + name="journalproc", + options={"ordering": ["-updated"]}, + ), + migrations.AlterModelOptions( + name="operation", + options={"ordering": ["-created"]}, + ), + migrations.AlterModelOptions( + name="procreport", + options={ + "ordering": ["-created"], + "verbose_name": "Processing report", + "verbose_name_plural": "Processing reports", + }, + ), + ] diff --git a/proc/models.py b/proc/models.py index 3e7b8a4a..45c6dce1 100644 --- a/proc/models.py +++ b/proc/models.py @@ -4,7 +4,6 @@ import json from datetime import datetime from tempfile import TemporaryDirectory -from zipfile import ZipFile from django.core.files.base import ContentFile from django.db import models @@ -81,14 +80,16 @@ class Operation(CommonControlField): base_form_class = ProcAdminModelForm panels = [ - FieldPanel("name"), + FieldPanel("name", read_only=True), FieldPanel("created", read_only=True), FieldPanel("updated", read_only=True), - FieldPanel("completed"), - FieldPanel("detail"), + FieldPanel("completed", read_only=True), + FieldPanel("detail", read_only=True), ] class Meta: + # isso faz com que em InlinePanel mostre do mais recente para o mais antigo + ordering = ['-created'] indexes = [ models.Index(fields=["name"]), ] @@ -121,7 +122,7 @@ def create(cls, user, proc, name): # obtém todos os ítens criados após este evento rows = [] - for row in cls.objects.filter(proc=proc, created__gte=item.created).iterator(): + for row in cls.objects.filter(proc=proc, created__gte=item.created).order_by('created').iterator(): rows.append(row.data) try: @@ -254,15 +255,17 @@ class ProcReport(CommonControlField): item_type = models.CharField(_("Item type"), max_length=16, null=True, blank=True) panel_files = [ - FieldPanel("task_name"), - FieldPanel("report_date"), - FieldPanel("file"), + FieldPanel("task_name", read_only=True), + FieldPanel("report_date", read_only=True), + FieldPanel("file", read_only=True), ] def __str__(self): return f"{self.collection.acron} {self.pid} {self.task_name} {self.report_date}" class Meta: + ordering = ['-created'] + verbose_name = _("Processing report") verbose_name_plural = _("Processing reports") indexes = [ @@ -410,6 +413,8 @@ class BaseProc(CommonControlField): class Meta: abstract = True + ordering = ['-updated'] + indexes = [ models.Index(fields=["pid"]), ] @@ -431,7 +436,7 @@ class Meta: edit_handler = TabbedInterface( [ ObjectList(panel_status, heading=_("Status")), - ObjectList(panel_proc_result, heading=_("Result")), + ObjectList(panel_proc_result, heading=_("Events newest to oldest")), ] ) @@ -504,6 +509,7 @@ def register_classic_website_data( obj.migration_status == tracker_choices.PROGRESS_STATUS_TODO ), message=None, + detail=obj.migrated_data, ) return obj except Exception as e: @@ -571,13 +577,13 @@ def create_or_update_item( operation = self.start(user, f"create or update {item_name}") - callable_register_data(user, self, force_update) - + registered = callable_register_data(user, self, force_update) operation.finish( user, completed=( self.migration_status == tracker_choices.PROGRESS_STATUS_DONE ), + detail=registered and registered.data, ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -710,18 +716,19 @@ class JournalProc(BaseProc, ClusterableModel): base_form_class = ProcAdminModelForm panel_proc_result = [ - InlinePanel("journal_proc_result", label=_("Proc result")), + InlinePanel("journal_proc_result", label=_("Event")), ] MigratedDataClass = MigratedJournal edit_handler = TabbedInterface( [ ObjectList(BaseProc.panel_status, heading=_("Status")), - ObjectList(panel_proc_result, heading=_("Result")), + ObjectList(panel_proc_result, heading=_("Events newest to oldest")), ] ) class Meta: + ordering = ['-updated'] indexes = [ models.Index(fields=["acron"]), ] @@ -843,12 +850,12 @@ def __str__(self): AutocompletePanel("issue_files"), ] panel_proc_result = [ - InlinePanel("issue_proc_result"), + InlinePanel("issue_proc_result", label=_("Event")), ] edit_handler = TabbedInterface( [ ObjectList(panel_status, heading=_("Status")), - ObjectList(panel_proc_result, heading=_("Result")), + ObjectList(panel_proc_result, heading=_("Events newest to oldest")), ] ) @@ -873,6 +880,7 @@ def status(self): ) class Meta: + ordering = ['-updated'] indexes = [ models.Index(fields=["issue_folder"]), models.Index(fields=["docs_status"]), @@ -1125,8 +1133,8 @@ class ArticleProc(BaseProc, ClusterableModel): ProcResult = ArticleProcResult panel_files = [ - FieldPanel("pkg_name"), - AutocompletePanel("sps_pkg"), + FieldPanel("pkg_name", read_only=True), + AutocompletePanel("sps_pkg", read_only=True), ] panel_status = [ FieldPanel("xml_status"), @@ -1139,19 +1147,20 @@ class ArticleProc(BaseProc, ClusterableModel): # AutocompletePanel("events"), # ] panel_proc_result = [ - InlinePanel("article_proc_result"), + InlinePanel("article_proc_result", label=_("Event")), ] edit_handler = TabbedInterface( [ ObjectList(panel_status, heading=_("Status")), ObjectList(panel_files, heading=_("Files")), - ObjectList(panel_proc_result, heading=_("Result")), + ObjectList(panel_proc_result, heading=_("Events newest to oldest")), ] ) MigratedDataClass = MigratedArticle class Meta: + ordering = ['-updated'] indexes = [ models.Index(fields=["pkg_name"]), models.Index(fields=["xml_status"]), @@ -1199,9 +1208,9 @@ def get_xml(self, user, htmlxml, body_and_back_xml): self.save() if htmlxml: - xml = htmlxml.html_to_xml(user, self, body_and_back_xml) - else: - xml = get_migrated_xml_with_pre(self) + htmlxml.html_to_xml(user, self, body_and_back_xml) + + xml = get_migrated_xml_with_pre(self) if xml: self.xml_status = tracker_choices.PROGRESS_STATUS_DONE @@ -1212,6 +1221,7 @@ def get_xml(self, user, htmlxml, body_and_back_xml): operation.finish( user, completed=self.xml_status == tracker_choices.PROGRESS_STATUS_DONE, + detail=xml and xml.data, ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -1394,7 +1404,7 @@ def generate_sps_package( operation.finish( user, completed=bool(self.sps_pkg and self.sps_pkg.is_complete), - detail=self.sps_pkg and self.sps_pkg.data or None, + detail=self.sps_pkg and self.sps_pkg.data, ) except Exception as e: @@ -1405,7 +1415,7 @@ def generate_sps_package( user, exc_traceback=exc_traceback, exception=e, - detail=self.sps_pkg and self.sps_pkg.data or None, + detail=self.sps_pkg and self.sps_pkg.data, ) def fix_pid_v2(self, user): @@ -1422,7 +1432,7 @@ def update_sps_pkg_status(self): elif not self.sps_pkg.valid_components: self.sps_pkg_status = tracker_choices.PROGRESS_STATUS_REPROC else: - self.sps_pkg_status = tracker_choices.PROGRESS_STATUS_REPROC + self.sps_pkg_status = tracker_choices.PROGRESS_STATUS_PENDING self.save() @property diff --git a/tracker/models.py b/tracker/models.py index 0d7dbe37..474e009d 100644 --- a/tracker/models.py +++ b/tracker/models.py @@ -98,7 +98,8 @@ def create( try: json.dumps(detail) obj.detail = detail - except: + except Exception as json_e: + logging.exception(json_e) obj.detail = str(detail) if exc_traceback: obj.traceback = traceback.format_tb(exc_traceback) From e85d57a5d5c5ac128809bd7e17d578773a62a391 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:04:24 -0300 Subject: [PATCH 16/25] =?UTF-8?q?Refatora=20upload=20parte=203=20-=20agrup?= =?UTF-8?q?a=20em=20uma=20tarefa=20as=20valida=C3=A7=C3=B5es:=20assets,=20?= =?UTF-8?q?renditions,=20conte=C3=BAdo=20do=20XML=20(#398)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes --- upload/tasks.py | 74 +++++ upload/xml_validation.py | 577 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 651 insertions(+) create mode 100644 upload/xml_validation.py diff --git a/upload/tasks.py b/upload/tasks.py index 50797046..5ed8f808 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -9,6 +9,7 @@ from packtools.sps.validation import article as sps_validation_article from packtools.sps.validation import journal as sps_validation_journal from packtools.validator import ValidationReportXML +from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre from article.choices import AS_CHANGE_SUBMITTED from article.controller import create_article_from_etree, update_article @@ -22,6 +23,7 @@ from .utils import file_utils, package_utils, xml_utils from upload.models import Package +from upload.xml_validation import validate_xml_content, add_app_data, add_sps_data, add_journal_data User = get_user_model() @@ -539,3 +541,75 @@ def _get_user(request, user_id): def task_request_pid_for_accepted_packages(self, user_id): user = _get_user(self.request, user_id) controller.request_pid_for_accepted_packages(user) + + +@celery_app.task(bind=True) +def task_validate_original_zip_file(self, package_id, file_path, journal_id, issue_id, article_id): + + for xml_with_pre in XMLWithPre.create(file_path=file_path): + xml_path = xml_with_pre.filename + break + + if xml_path: + # Aciona validação de Assets + task_validate_assets.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + }, + ) + + # Aciona validação de Renditions + task_validate_renditions.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + }, + ) + + # Aciona validacao do conteudo do XML + task_validate_xml_content.apply_async( + kwargs={ + "file_path": file_path, + "xml_path": xml_path, + "package_id": package_id, + "journal_id": journal_id, + "issue_id": issue_id, + "article_id": article_id, + }, + ) + + +@celery_app.task(bind=True) +def task_validate_xml_content(self, file_path, xml_path, package_id, journal_id, issue_id, article_id): + # VE_BIBLIOMETRICS_DATA_ERROR = "bibliometrics-data-error" + # VE_SERVICES_DATA_ERROR = "services-data-error" + # VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" + # VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" + + # TODO completar data + data = {} + # add_app_data(data, app_data) + # add_journal_data(data, journal, issue) + # add_sps_data(data, sps_data) + + package = Package.objects.get(pk=package_id) + for xml_with_pre in XMLWithPre.create(file_path=file_path): + results = validate_xml_content(xml_with_pre.sps_pkg_name, xml_with_pre.xmltree, data) + + for result in results: + # ['xpath', 'advice', 'title', 'expected_value', 'got_value', 'message', 'validation_type', 'response'] + if not result["response"] == "ERROR": + continue + + message = result["message"] + advice = result["advice"] or '' + message = ". ".join(_(message), _(advice)) + package._add_validation_result( + error_category=choices.VE_DATA_CONSISTENCY_ERROR, + status=choices.VS_DISAPPROVED, + message=message, + data=result, + ) diff --git a/upload/xml_validation.py b/upload/xml_validation.py new file mode 100644 index 00000000..304d0b63 --- /dev/null +++ b/upload/xml_validation.py @@ -0,0 +1,577 @@ +import sys + +from packtools.sps.validation.aff import AffiliationsListValidation +from packtools.sps.validation.article_and_subarticles import ( + ArticleLangValidation, + ArticleAttribsValidation, + ArticleIdValidation, + ArticleSubjectsValidation, + ArticleTypeValidation, +) +from packtools.sps.validation.article_authors import ArticleAuthorsValidation + +from packtools.sps.validation.article_data_availability import ( + DataAvailabilityValidation, +) +from packtools.sps.validation.article_doi import ArticleDoiValidation +from packtools.sps.validation.article_lang import ArticleLangValidation +from packtools.sps.validation.article_license import ArticleLicenseValidation +from packtools.sps.validation.article_toc_sections import ArticleTocSectionsValidation +from packtools.sps.validation.article_xref import ArticleXrefValidation +from packtools.sps.validation.dates import ArticleDatesValidation +from packtools.sps.validation.journal_meta import JournalMetaValidation +from packtools.sps.validation.preprint import PreprintValidation +from packtools.sps.validation.related_articles import RelatedArticlesValidation +from tracker.models import UnexpectedEvent + + +def doi_callable_get_data(doi): + return {} + + +def orcid_callable_get_validate(orcid): + return {} + + +def add_app_data(data, app_data): + # TODO + data["country_codes"] = [] + + +def add_journal_data(data, journal, issue): + # TODO + # específico do periódico + data["language_codes"] = [] + + if issue: + data["subjects"] = issue.subjects_list + data["expected_toc_sections"] = issue.toc_sections + else: + data["subjects"] = journal.subjects_list + data["expected_toc_sections"] = journal.toc_sections + + # { + # 'issns': { + # 'ppub': '0103-5053', + # 'epub': '1678-4790' + # }, + # 'acronym': 'hcsm', + # 'journal-title': 'História, Ciências, Saúde-Manguinhos', + # 'abbrev-journal-title': 'Hist. cienc. saude-Manguinhos', + # 'publisher-name': ['Casa de Oswaldo Cruz, Fundação Oswaldo Cruz'], + # 'nlm-ta': 'Rev Saude Publica' + # } + data["journal"] = journal.data + data["expected_license_code"] = journal.license_code + + +def add_sps_data(data, sps_data): + # TODO + # depende do SPS / JATS / Critérios + data["dtd_versions"] = [] + data["sps_versions"] = [] + data["article_types"] = [] + data["expected_article_type_vs_subject_similarity"] = 0 + data["data_availability_specific_uses"] = [] + + data["credit_taxonomy"] = [] + + data["article_type_correspondences"] = [] + + data["future_date"] = "" + data["events_order"] = [] + data["required_events"] = [] + + +def validate_xml_content(sps_pkg_name, xmltree, data): + + functions = ( + validate_affiliations, + validate_languages, + validate_article_attributes, + validate_article_id_other, + validate_subjects, + validate_article_type, + validate_authors, + validate_data_availability, + validate_doi, + validate_article_languages, + validate_licenses, + validate_toc_sections, + validate_xref, + validate_dates, + validate_journal, + validate_preprint, + validate_related_articles, + ) + for f in functions: + yield from f(sps_pkg_name, xmltree, data) + + +def validate_affiliations(sps_pkg_name, xmltree, data): + xml = AffiliationsListValidation(xmltree) + + try: + yield from xml.validade_affiliations_list(data["country_codes"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_affiliations", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_languages(sps_pkg_name, xmltree, data): + xml = ArticleLangValidation(xmltree) + + try: + yield from xml.validate_language(data["language_codes"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_languages", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_attributes(sps_pkg_name, xmltree, data): + xml = ArticleAttribsValidation(xmltree) + + try: + yield from xml.validate_dtd_version(data["dtd_versions"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_dtd_version", + "sps_pkg_name": sps_pkg_name, + }, + ) + + try: + yield from xml.validate_specific_use(data["sps_versions"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_specific_use", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_id_other(sps_pkg_name, xmltree, data): + xml = ArticleIdValidation(xmltree) + + try: + yield from xml.validate_article_id_other() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_id_other", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_subjects(sps_pkg_name, xmltree, data): + xml = ArticleSubjectsValidation(xmltree) + + try: + yield from xml.validate_without_subjects() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_without_subjects", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_type(sps_pkg_name, xmltree, data): + xml = ArticleTypeValidation(xmltree) + + try: + yield from xml.validate_article_type(data["article_types"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_type", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_article_type_vs_subject_similarity( + data["subjects"], data["expected_article_type_vs_subject_similarity"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_type_vs_subject_similarity", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_authors(sps_pkg_name, xmltree, data): + xml = ArticleAuthorsValidation(xmltree) + + try: + yield from xml.validate_authors_orcid_format() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_format", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_orcid_is_registered( + data["callable_get_orcid_data"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_is_registered", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_orcid_is_unique() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_orcid_is_unique", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_authors_role(data["credit_taxonomy"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_authors_role", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_data_availability(sps_pkg_name, xmltree, data): + xml = DataAvailabilityValidation(xmltree) + + try: + yield from xml.validate_data_availability( + data["data_availability_specific_uses"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_data_availability", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_doi(sps_pkg_name, xmltree, data): + xml = ArticleDoiValidation(xmltree) + + try: + yield from xml.validate_all_dois_are_unique() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_all_dois_are_unique", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_doi_registered(data["callable_get_doi_data"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_doi_registered", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_main_article_doi_exists() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_main_article_doi_exists", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_translations_doi_exists() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_translations_doi_exists", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_article_languages(sps_pkg_name, xmltree, data): + xml = ArticleLangValidation(xmltree) + + try: + yield from xml.validate_article_lang() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_lang", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_licenses(sps_pkg_name, xmltree, data): + xml = ArticleLicenseValidation(xmltree) + # yield from xml.validate_license(license_expected_value) + + try: + yield from xml.validate_license_code(data["expected_license_code"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_license_code", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_toc_sections(sps_pkg_name, xmltree, data): + xml = ArticleTocSectionsValidation(xmltree) + + try: + yield from xml.validade_article_title_is_different_from_section_titles() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validade_article_title_is_different_from_section_titles", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_article_toc_sections(data["expected_toc_sections"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_toc_sections", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_xref(sps_pkg_name, xmltree, data): + xml = ArticleXrefValidation(xmltree) + + try: + yield from xml.validate_id() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_id", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_rid() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_rid", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_dates(sps_pkg_name, xmltree, data): + xml = ArticleDatesValidation(xmltree) + + try: + yield from xml.validate_article_date(data["future_date"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_article_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_collection_date(data["future_date"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_collection_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_history_dates( + data["events_order"], data["required_events"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_history_dates", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.validate_number_of_digits_in_article_date() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_number_of_digits_in_article_date", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_journal(sps_pkg_name, xmltree, data): + xml = JournalMetaValidation(xmltree) + + try: + yield from xml.validate(data["journal"]) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_journal", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_preprint(sps_pkg_name, xmltree, data): + xml = PreprintValidation(xmltree) + + try: + yield from xml.preprint_validation() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.preprint_validation", + "sps_pkg_name": sps_pkg_name, + }, + ) + + +def validate_related_articles(sps_pkg_name, xmltree, data): + xml = RelatedArticlesValidation(xmltree) + + try: + yield from xml.related_articles_doi() + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.validate_related_articles", + "sps_pkg_name": sps_pkg_name, + }, + ) + try: + yield from xml.related_articles_matches_article_type_validation( + data["article_type_correspondences"] + ) + except Exception as exc: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exc, + exc_traceback=exc_traceback, + detail={ + "function": "upload.xml_validation.related_articles_matches_article_type_validation", + "sps_pkg_name": sps_pkg_name, + }, + ) From 6e5a7028d1357c1be1f30905c757949a90b17c76 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:13:29 -0300 Subject: [PATCH 17/25] =?UTF-8?q?Refatora=20upload=20parte=203=20-=20agrup?= =?UTF-8?q?a=20em=20uma=20tarefa=20as=20valida=C3=A7=C3=B5es:=20assets,=20?= =?UTF-8?q?renditions,=20conte=C3=BAdo=20do=20XML=20(#399)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes From cebf173f968d8c9b707542a6ec6cebcda85ab863 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 13:50:36 -0300 Subject: [PATCH 18/25] =?UTF-8?q?Refatora=20upload=20parte=202=20-=20Adici?= =?UTF-8?q?ona=20fun=C3=A7=C3=B5es=20em=20upload.controller=20para=20avali?= =?UTF-8?q?ar=20o=20pacote=20rec=C3=A9m=20recebido=20(#400)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria os upload.choices.VE_UNEXPECTED_ERROR e VE_FORBIDDEN_UPDATE_ERROR * Cria/Edita Package.get, create_or_update, _add_validation_result * Cria funções para avaliar o XML recém-recebido (é esperado? os dados de journal e issue estão corretos?) * Cria testes para upload.controller.* * Adiciona a migração de banco de dados por criar novos valores de choices * Corrige ausência de definição de variáveis --- upload/choices.py | 5 +- upload/controller.py | 279 ++++++++++ .../0002_alter_validationresult_category.py | 36 ++ upload/models.py | 35 +- upload/tests.py | 498 ++++++++++++++++++ 5 files changed, 847 insertions(+), 6 deletions(-) create mode 100644 upload/migrations/0002_alter_validationresult_category.py diff --git a/upload/choices.py b/upload/choices.py index 2a2cabb7..0a6ce5b4 100644 --- a/upload/choices.py +++ b/upload/choices.py @@ -45,6 +45,8 @@ # Model ValidationResult, Field category, VE = Validation Error VE_PACKAGE_FILE_ERROR = "package-file-error" +VE_UNEXPECTED_ERROR = "unexpected-error" +VE_FORBIDDEN_UPDATE_ERROR = "forbidden-update-error" VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR = "article-journal-incompatibility-error" VE_ARTICLE_IS_NOT_NEW_ERROR = "article-is-not-new-error" VE_XML_FORMAT_ERROR = "xml-format-error" @@ -56,7 +58,8 @@ VE_RENDITION_ERROR = "rendition-error" VALIDATION_ERROR_CATEGORY = ( - (VE_PACKAGE_FILE_ERROR, "PACKAGE_FILE_ERROR"), + (VE_UNEXPECTED_ERROR, "UNEXPECTED_ERROR"), + (VE_FORBIDDEN_UPDATE_ERROR, "FORBIDDEN_UPDATE_ERROR"), (VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, "ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR"), (VE_ARTICLE_IS_NOT_NEW_ERROR, "ARTICLE_IS_NOT_NEW_ERROR"), (VE_XML_FORMAT_ERROR, "XML_FORMAT_ERROR"), diff --git a/upload/controller.py b/upload/controller.py index d054b33d..a6455da4 100644 --- a/upload/controller.py +++ b/upload/controller.py @@ -1,7 +1,13 @@ import logging +import sys from datetime import datetime +from packtools.sps.models.journal_meta import Title, ISSN +from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre, GetXMLItemsError +from packtools.sps.models.front_articlemeta_issue import ArticleMetaIssue + from article.controller import create_article +from article import choices as article_choices from collection.models import WebSiteConfiguration from libs.dsm.publication.db import exceptions, mk_connection from package import choices as package_choices @@ -14,6 +20,14 @@ ValidationResult, choices, ) +from .utils import file_utils, package_utils, xml_utils +from pid_provider.requester import PidRequester +from article.models import Article +from issue.models import Issue +from journal.models import OfficialJournal, Journal +from tracker.models import UnexpectedEvent + +pp = PidRequester() def create_package( @@ -87,3 +101,268 @@ def request_pid_for_accepted_packages(user): logging.exception( f"Unable to create / update article {response['error_msg']}" ) + + +def receive_package(package): + try: + for xml_with_pre in XMLWithPre.create(path=package.file.path): + response = _check_article_and_journal(xml_with_pre) + + package.article = response.get("article") + package.category = response.get("package_category") + package.status = response.get("package_status") + package.save() + + error_category = response.get("error_type") + if error_category: + package._add_validation_result( + error_category=error_category, + status=choices.VS_DISAPPROVED, + message=response["error"], + data={}, + ) + # falhou, retorna response + return package + # sucesso, retorna package + package._add_validation_result( + error_category=choices.VE_XML_FORMAT_ERROR, + status=choices.VS_APPROVED, + message=None, + data={ + "xml_path": package.file.path, + }, + ) + return package + except GetXMLItemsError as exc: + # identifica os erros do arquivo Zip / XML + _identify_file_error(package) + return package + + +def _identify_file_error(package): + # identifica os erros do arquivo Zip / XML + try: + xml_path = None + xml_str = file_utils.get_xml_content_from_zip(package.file.path, xml_path) + xml_utils.get_etree_from_xml_content(xml_str) + except (file_utils.BadPackageFileError, file_utils.PackageWithoutXMLFileError) as exc: + package._add_validation_result( + error_category=choices.VE_PACKAGE_FILE_ERROR, + message=exc.message, + status=choices.VS_DISAPPROVED, + data={"exception": str(exc), "exception_type": str(type(exc))}, + ) + + except xml_utils.XMLFormatError as e: + data = { + "xml_path": package.file.path, + "column": e.column, + "row": e.start_row, + "snippet": xml_utils.get_snippet(xml_str, e.start_row, e.end_row), + } + package._add_validation_result( + error_category=choices.VE_XML_FORMAT_ERROR, + message=e.message, + data=data, + status=choices.VS_DISAPPROVED, + ) + + +def _check_article_and_journal(xml_with_pre): + # verifica se o XML está registrado no sistema + response = pp.is_registered_xml_with_pre(xml_with_pre, xml_with_pre.filename) + + # verifica se o XML é esperado + article_previous_status = _check_package_is_expected(response) + + # verifica se XML já está associado a um article + try: + article = response.pop("article") + except KeyError: + article = None + + # caso encontrado erro, sair da função + if response.get("error"): + return _handle_error(response, article, article_previous_status) + + xmltree = xml_with_pre.xmltree + + # verifica se journal e issue estão registrados + _check_xml_journal_and_xml_issue_are_registered( + xml_with_pre.filename, xmltree, response + ) + # caso encontrado erro, sair da função + if response.get("error"): + return _handle_error(response, article, article_previous_status) + + if article: + # verifica a consistência dos dados de journal e issue + # no XML e na base de dados + _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + if response.get("error"): + # inconsistências encontradas + return _handle_error(response, article, article_previous_status) + else: + # sem problemas + response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION + response.update({"article": article}) + return response + # documento novo + response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION + return response + + +def _handle_error(response, article, article_previous_status): + _rollback_article_status(article, article_previous_status) + response["package_status"] = choices.PS_REJECTED + return response + + +def _check_package_is_expected(response): + article = None + try: + response["article"] = Article.objects.get(pid_v3=response["v3"]) + return _get_article_previous_status(response["article"], response) + except (Article.DoesNotExist, KeyError): + # TODO verificar journal, issue + response["package_category"] = choices.PC_NEW_DOCUMENT + + +def _get_article_previous_status(article, response): + article_previos_status = article.status + if article.status == article_choices.AS_REQUIRE_UPDATE: + article.status = article_choices.AS_CHANGE_SUBMITTED + article.save() + response["package_category"] = choices.PC_UPDATE + return article_previos_status + elif article.status == article_choices.AS_REQUIRE_ERRATUM: + article.status = article_choices.AS_CHANGE_SUBMITTED + article.save() + response["package_category"] = choices.PC_ERRATUM + return article_previos_status + else: + response["error"] = f"Unexpected package. Article has no need to be updated / corrected. Article status: {article_previos_status}" + response["error_type"] = choices.VE_FORBIDDEN_UPDATE_ERROR + response["package_category"] = choices.PC_UPDATE + + +def _rollback_article_status(article, article_previos_status): + if article_previos_status: + # rollback + article.status = article_previos_status + article.save() + + +def _check_xml_journal_and_xml_issue_are_registered(filename, xmltree, response): + """ + Verifica se journal e issue do XML estão registrados no sistema + """ + try: + resp = {} + resp = _check_journal(filename, xmltree) + journal = resp["journal"] + resp = _check_issue(filename, xmltree, journal) + issue = resp["issue"] + response.update({"journal": journal, "issue": issue}) + except KeyError: + response.update(resp) + + +def _get_journal(journal_title, issn_electronic, issn_print): + j = None + if issn_electronic: + try: + j = OfficialJournal.objects.get(issn_electronic=issn_electronic) + except OfficialJournal.DoesNotExist: + pass + + if not j and issn_print: + try: + j = OfficialJournal.objects.get(issn_print=issn_print) + except OfficialJournal.DoesNotExist: + pass + + if not j and journal_title: + try: + j = OfficialJournal.objects.get(journal_title=journal_title) + except OfficialJournal.DoesNotExist: + pass + + if j: + return Journal.objects.get(official=j) + raise Journal.DoesNotExist(f"{journal_title} {issn_electronic} {issn_print}") + + +def _check_journal(origin, xmltree): + try: + xml = Title(xmltree) + journal_title = xml.journal_title + + xml = ISSN(xmltree) + issn_electronic = xml.epub + issn_print = xml.ppub + + return dict(journal=_get_journal(journal_title, issn_electronic, issn_print)) + except Journal.DoesNotExist: + return dict( + error=f"Journal in XML is not registered in Upload: {journal_title} {issn_electronic} (electronic) {issn_print} (print)", + error_type=choices.VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "upload.controller._check_journal", + "detail": dict(origin=origin), + }, + ) + return {"error": str(e), "error_type": choices.VE_UNEXPECTED_ERROR} + + +def _check_issue(origin, xmltree, journal): + try: + xml = ArticleMetaIssue(xmltree) + logging.info(xml.data) + if any((xml.volume, xml.suppl, xml.number)): + return {"issue": Issue.get(journal, xml.volume, xml.suppl, xml.number)} + else: + return {"issue": None} + except Issue.DoesNotExist: + return dict( + error=f"Issue in XML is not registered in Upload: {journal} {xml.data}", + error_type=choices.VE_DATA_CONSISTENCY_ERROR, + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "upload.controller._check_issue", + "detail": dict(origin=origin), + }, + ) + return {"error": str(e), "error_type": choices.VE_UNEXPECTED_ERROR} + + +def _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response): + issue = response["issue"] + journal = response["journal"] + if article.issue is issue and article.journal is journal: + response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION + elif article.issue is issue: + response.update( + dict( + error=f"{article.journal} (registered) differs from {journal} (XML)", + error_type=choices.VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, + ) + ) + else: + response.update( + dict( + error=f"{article.journal} {article.issue} (registered) differs from {journal} {issue} (XML)", + error_type=choices.VE_DATA_CONSISTENCY_ERROR, + ) + ) diff --git a/upload/migrations/0002_alter_validationresult_category.py b/upload/migrations/0002_alter_validationresult_category.py new file mode 100644 index 00000000..2a58f70c --- /dev/null +++ b/upload/migrations/0002_alter_validationresult_category.py @@ -0,0 +1,36 @@ +# Generated by Django 4.2.6 on 2024-02-19 23:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("upload", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="validationresult", + name="category", + field=models.CharField( + choices=[ + ("unexpected-error", "UNEXPECTED_ERROR"), + ("forbidden-update-error", "FORBIDDEN_UPDATE_ERROR"), + ( + "article-journal-incompatibility-error", + "ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR", + ), + ("article-is-not-new-error", "ARTICLE_IS_NOT_NEW_ERROR"), + ("xml-format-error", "XML_FORMAT_ERROR"), + ("bibliometrics-data-error", "BIBLIOMETRICS_DATA_ERROR"), + ("services-data-error", "SERVICES_DATA_ERROR"), + ("data-consistency-error", "DATA_CONSISTENCY_ERROR"), + ("criteria-issues-error", "CRITERIA_ISSUES"), + ("asset-error", "ASSET_ERROR"), + ("rendition-error", "RENDITION_ERROR"), + ], + max_length=64, + verbose_name="Error category", + ), + ), + ] diff --git a/upload/models.py b/upload/models.py index e52a4801..5bdbc5c3 100644 --- a/upload/models.py +++ b/upload/models.py @@ -41,7 +41,7 @@ class Package(CommonControlField): default=choices.PS_ENQUEUED_FOR_VALIDATION, ) article = models.ForeignKey( - Article, blank=True, null=True, on_delete=models.SET_NULL + Article, blank=True, null=True, on_delete=models.SET_NULL, ) issue = models.ForeignKey(Issue, blank=True, null=True, on_delete=models.SET_NULL) assignee = models.ForeignKey(User, blank=True, null=True, on_delete=models.SET_NULL) @@ -93,10 +93,17 @@ def add_validation_result( cls, package_id, error_category=None, status=None, message=None, data=None ): package = cls.objects.get(pk=package_id) + val_res = package._add_validation_result( + error_category, status, message, data) + return val_res + + def _add_validation_result( + self, error_category=None, status=None, message=None, data=None + ): val_res = ValidationResult.create( - error_category, package, status, message, data + error_category, self, status, message, data ) - package.update_status(val_res) + self.update_status(val_res) return val_res def update_status(self, validation_result): @@ -105,8 +112,11 @@ def update_status(self, validation_result): self.save() @classmethod - def get(cls, pkg_id): - return cls.objects.get(pk=pkg_id) + def get(cls, pkg_id=None, article=None): + if pkg_id: + return cls.objects.get(pk=pkg_id) + if article: + return cls.objects.get(article=article) @classmethod def create(cls, user_id, file, article_id=None, category=None, status=None): @@ -120,6 +130,21 @@ def create(cls, user_id, file, article_id=None, category=None, status=None): obj.save() return obj + @classmethod + def create_or_update(cls, user_id, file, article=None, category=None, status=None): + try: + obj = cls.get(article=article) + obj.article = article + obj.file = file + obj.category = category + obj.status = status + obj.save() + return obj + except cls.DoesNotExist: + return cls.create( + user_id, file, article_id=article.id, category=category, status=status + ) + def check_errors(self): for vr in self.validationresult_set.filter(status=choices.VS_DISAPPROVED): if vr.resolution.action in (choices.ER_ACTION_TO_FIX, ""): diff --git a/upload/tests.py b/upload/tests.py index 7ce503c2..bfd1b3a2 100644 --- a/upload/tests.py +++ b/upload/tests.py @@ -1,3 +1,501 @@ +from unittest.mock import Mock, patch, ANY, call + from django.test import TestCase +from lxml import etree + +from upload import controller, choices +from article.models import Article +from article import choices as article_choices +from issue.models import Issue +from journal.models import Journal, OfficialJournal + # Create your tests here. +class ControllerTest(TestCase): + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_and_issue_differ(self): + response = {"journal": "not journal", "issue": "not issue"} + article = Mock(spec=Article) + article.issue = "issue" + article.journal = "journal" + journal = "not journal" + issue = "not issue" + expected = { + "error": f"{article.journal} {article.issue} (registered) differs from {journal} {issue} (XML)", + "error_type": choices.VE_DATA_CONSISTENCY_ERROR, + } + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + self.assertEqual(expected["error"], response["error"]) + self.assertEqual(expected["error_type"], response["error_type"]) + + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_issue_differs(self): + response = {"journal": "Journal", "issue": "Not same issue"} + article = Mock(spec=Article) + article.issue = "Issue" + article.journal = "Journal" + journal = "Journal" + issue = "Not same issue" + expected = { + "error": f"{article.journal} {article.issue} (registered) differs from {journal} {issue} (XML)", + "error_type": choices.VE_DATA_CONSISTENCY_ERROR, + } + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + self.assertEqual(expected["error"], response["error"]) + self.assertEqual(expected["error_type"], response["error_type"]) + + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_differs(self): + response = {"journal": "not journal", "issue": "issue"} + article = Mock(spec=Article) + article.issue = "issue" + article.journal = "journal" + journal = "not journal" + issue = "issue" + expected = { + "error": f"{article.journal} (registered) differs from {journal} (XML)", + "error_type": choices.VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, + } + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + self.assertEqual(expected["error"], response["error"]) + self.assertEqual(expected["error_type"], response["error_type"]) + + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_and_issue_compatible(self): + response = {"journal": "journal", "issue": "issue"} + article = Mock(spec=Article) + article.issue = "issue" + article.journal = "journal" + journal = "journal" + issue = "issue" + expected = { + "package_status": choices.PS_ENQUEUED_FOR_VALIDATION, + } + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + self.assertIsNone(response.get("error")) + self.assertEqual(expected["package_status"], response["package_status"]) + + +class CheckIssueTest(TestCase): + @patch("upload.controller.Issue.get") + def test_issue_exists(self, mock_issue_get): + xmltree = etree.fromstring( + "
" + "Volume" + "Number" + "Suppl" + "
", + ) + instance = Issue(volume="Volume", supplement="Suppl", number="Number") + mock_issue_get.return_value = instance + journal = "JJJJ" + result = controller._check_issue("origin", xmltree, journal) + self.assertEqual({"issue": instance}, result) + + @patch("upload.controller.Issue.get") + def test_issue_does_not_exist(self, mock_issue_get): + xmltree = etree.fromstring( + "
" + "Volume" + "Number" + "Suppl" + "
", + ) + + mock_issue_get.side_effect = Issue.DoesNotExist + journal = "JJJJ" + result = controller._check_issue("origin", xmltree, journal) + d = {"volume": "Volume", "number": "Number", "suppl": "Suppl"} + expected = dict( + error=f"Issue in XML is not registered in Upload: JJJJ {d}", + error_type=choices.VE_DATA_CONSISTENCY_ERROR, + ) + self.assertEqual(expected["error_type"], result["error_type"]) + self.assertEqual(expected["error"], result["error"]) + + @patch("upload.controller.Issue.get") + def test_issue_absent_in_xml(self, mock_issue_get): + xmltree = etree.fromstring( + "
" "
", + ) + journal = "JJJJ" + result = controller._check_issue("origin", xmltree, journal) + self.assertEqual({"issue": None}, result) + + @patch("upload.controller.UnexpectedEvent.create") + @patch("upload.controller.Issue.get") + def test_issue_raise_exception(self, mock_issue_get, mock_unexpected_create): + xmltree = etree.fromstring( + "
" + "Volume" + "Number" + "Suppl" + "
", + ) + + exc = TypeError("Erro inesperado") + mock_issue_get.side_effect = exc + + result = controller._check_issue("origin", xmltree, journal="JJJJ") + + expected = { + "error": "Erro inesperado", + "error_type": choices.VE_UNEXPECTED_ERROR, + } + self.assertEqual(expected, result) + + mock_unexpected_create.assert_called_with( + exception=exc, + exc_traceback=ANY, + detail={ + "operation": "upload.controller._check_issue", + "detail": {"origin": "origin"}, + }, + ) + + +class CheckJournalTest(TestCase): + @patch("upload.controller._get_journal") + def test_journal_exists(self, mock_journal_get): + xmltree = etree.fromstring( + "
" + "ISSN-ELEC" + "ISSN-PRIN" + "Título do periódico" + "
", + ) + instance = Journal() + mock_journal_get.return_value = instance + result = controller._check_journal("origin", xmltree) + self.assertEqual({"journal": instance}, result) + + @patch("upload.controller._get_journal") + def test_journal_does_not_exist(self, mock_journal_get): + xmltree = etree.fromstring( + "
" + "ISSN-ELEC" + "ISSN-PRIN" + "Título do periódico" + "
", + ) + + mock_journal_get.side_effect = Journal.DoesNotExist + result = controller._check_journal("origin", xmltree) + expected = dict( + error=f"Journal in XML is not registered in Upload: Título do periódico ISSN-ELEC (electronic) ISSN-PRIN (print)", + error_type="article-journal-incompatibility-error", + ) + self.assertEqual(expected["error_type"], result["error_type"]) + self.assertEqual(expected["error"], result["error"]) + + @patch("upload.controller.UnexpectedEvent.create") + @patch("upload.controller._get_journal") + def test_journal_raise_exception(self, mock_journal_get, mock_unexpected_create): + xmltree = etree.fromstring( + "
" + "ISSN-ELEC" + "ISSN-PRIN" + "Título do periódico" + "
", + ) + + exc = Exception("Erro inesperado") + mock_journal_get.side_effect = exc + + result = controller._check_journal("origin", xmltree) + + expected = { + "error": "Erro inesperado", + "error_type": choices.VE_UNEXPECTED_ERROR, + } + self.assertEqual(expected, result) + + mock_unexpected_create.assert_called_with( + exception=exc, + exc_traceback=ANY, + detail={ + "operation": "upload.controller._check_journal", + "detail": {"origin": "origin"}, + }, + ) + + +# def _get_journal(journal_title, issn_electronic, issn_print): +# j = None +# if issn_electronic: +# try: +# j = OfficialJournal.objects.get(issn_electronic=issn_electronic) +# except OfficialJournal.DoesNotExist: +# pass + +# if not j and issn_print: +# try: +# j = OfficialJournal.objects.get(issn_print=issn_print) +# except OfficialJournal.DoesNotExist: +# pass + +# if not j and journal_title: +# try: +# j = OfficialJournal.objects.get(journal_title=journal_title) +# except OfficialJournal.DoesNotExist: +# pass + +# if j: +# return Journal.objects.get(official=j) +# raise Journal.DoesNotExist(f"{journal_title} {issn_electronic} {issn_print}") + + +class GetJournalTest(TestCase): + @patch("upload.controller.OfficialJournal.objects.get") + @patch("upload.controller.Journal.objects.get") + def test__get_journal_with_issn_e(self, mock_journal_get, mock_official_j_get): + journal = Journal() + official_j = OfficialJournal() + mock_journal_get.return_value = journal + mock_official_j_get.return_value = official_j + + result = controller._get_journal( + journal_title=None, issn_electronic="XXXXXXX", issn_print=None + ) + self.assertEqual(journal, result) + mock_official_j_get.assert_called_with(issn_electronic="XXXXXXX") + mock_journal_get.assert_called_with(official=official_j) + + @patch("upload.controller.OfficialJournal.objects.get") + @patch("upload.controller.Journal.objects.get") + def test__get_journal_with_issn_print(self, mock_journal_get, mock_official_j_get): + journal = Journal() + official_j = OfficialJournal() + mock_journal_get.return_value = journal + mock_official_j_get.return_value = official_j + + result = controller._get_journal( + journal_title=None, issn_electronic=None, issn_print="XXXXXXX" + ) + self.assertEqual(journal, result) + mock_official_j_get.assert_called_with(issn_print="XXXXXXX") + mock_journal_get.assert_called_with(official=official_j) + + @patch("upload.controller.OfficialJournal.objects.get") + @patch("upload.controller.Journal.objects.get") + def test__get_journal_with_journal_title(self, mock_journal_get, mock_official_j_get): + journal = Journal() + official_j = OfficialJournal() + mock_journal_get.return_value = journal + mock_official_j_get.return_value = official_j + + result = controller._get_journal( + journal_title="XXXXXXX", issn_electronic=None, issn_print=None + ) + self.assertEqual(journal, result) + mock_official_j_get.assert_called_with(journal_title="XXXXXXX") + mock_journal_get.assert_called_with(official=official_j) + + @patch("upload.controller.OfficialJournal.objects.get") + @patch("upload.controller.Journal.objects.get") + def test__get_journal_with_issn_print_after_raise_exception_does_not_exist_for_issn_electronic(self, mock_journal_get, mock_official_j_get): + journal = Journal() + official_j = OfficialJournal() + mock_journal_get.return_value = journal + mock_official_j_get.side_effect = [ + OfficialJournal.DoesNotExist, + official_j, + ] + + result = controller._get_journal( + journal_title=None, issn_electronic="EEEEEEE", issn_print="XXXXXXX" + ) + self.assertEqual(journal, result) + self.assertEqual( + mock_official_j_get.mock_calls, + [ + call(issn_electronic="EEEEEEE"), + call(issn_print="XXXXXXX"), + ] + ) + mock_journal_get.assert_called_with(official=official_j) + + @patch("upload.controller.OfficialJournal.objects.get") + @patch("upload.controller.Journal.objects.get") + def test__get_journal_raises_multiple_object_returned(self, mock_journal_get, mock_official_j_get): + journal = Journal() + official_j = OfficialJournal() + mock_journal_get.return_value = journal + mock_official_j_get.side_effect = OfficialJournal.MultipleObjectsReturned + + with self.assertRaises(OfficialJournal.MultipleObjectsReturned) as exc: + result = controller._get_journal( + journal_title="Title", issn_electronic="EEEEEEE", issn_print="XXXXXXX" + ) + self.assertIsNone(result) + self.assertEqual( + mock_official_j_get.mock_calls, + [ + call(issn_electronic="EEEEEEE"), + ] + ) + mock_journal_get.assert_not_called() + + +@patch("upload.controller.Article") +class GetArticlePreviousStatusTest(TestCase): + + def test_get_article_previous_status_require_update(self, mock_article): + response = {} + article = Mock(spec=Article) + article.status = article_choices.AS_REQUIRE_UPDATE + result = controller._get_article_previous_status(article, response) + self.assertEqual(article_choices.AS_REQUIRE_UPDATE, result) + self.assertEqual(article.status, article_choices.AS_CHANGE_SUBMITTED) + self.assertEqual(response["package_category"], choices.PC_UPDATE) + + def test_get_article_previous_status_required_erratum(self, mock_article): + response = {} + article = Mock(spec=Article) + article.status = article_choices.AS_REQUIRE_ERRATUM + result = controller._get_article_previous_status(article, response) + self.assertEqual(article_choices.AS_REQUIRE_ERRATUM, result) + self.assertEqual(article.status, article_choices.AS_CHANGE_SUBMITTED) + self.assertEqual(response["package_category"], choices.PC_ERRATUM) + + def test_get_article_previous_status_not_required_erratum_and_not_require_update(self, mock_article): + response = {} + article = Mock(spec=Article) + article.status = "no matter what" + result = controller._get_article_previous_status(article, response) + self.assertIsNone(result) + self.assertEqual("no matter what", article.status) + self.assertEqual(response["package_category"], choices.PC_UPDATE) + self.assertEqual(f"Unexpected package. Article has no need to be updated / corrected. Article status: no matter what", response["error"]) + self.assertEqual(choices.VE_FORBIDDEN_UPDATE_ERROR, response["error_type"]) + + +@patch("upload.controller._get_journal") +@patch("upload.controller.Issue.get") +@patch("upload.controller.Article.objects.get") +@patch("upload.controller.PidRequester.is_registered_xml_with_pre") +class CheckArticleAndJournalTest(TestCase): + + def test__check_article_and_journal__registered_and_allowed_to_be_updated(self, mock_xml_with_pre, mock_article_get, mock_issue_get, mock_journal_get): + + mock_xml_with_pre.return_value = {"v3": "yjukillojhk"} + + article_instance = Mock(spec=Article) + + article_instance.status = article_choices.AS_REQUIRE_UPDATE + mock_article_get.return_value = article_instance + + issue_instance = Mock(spec=Issue) + issue_instance.supplement = "Suppl" + issue_instance.number = "Number" + issue_instance.volume = "Volume" + mock_issue_get.return_value = issue_instance + + journal_instance = Mock(spec=Journal) + journal_instance.issn_electronic = "ISSN-ELEC" + journal_instance.issn_print = "ISSN-PRIN" + mock_journal_get.return_value = journal_instance + + issue_instance.journal = journal_instance + article_instance.issue = issue_instance + article_instance.journal = journal_instance + + xmltree = etree.fromstring( + "
" + "ISSN-ELEC" + "ISSN-PRIN" + "Título do periódico" + "" + "" + "Volume" + "Number" + "Suppl" + "" + "
", + ) + xml_with_pre = controller.XMLWithPre("", xmltree) + xml_with_pre.filename = "zzz.zip" + result = controller._check_article_and_journal(xml_with_pre) + self.assertIsNone(result.get("error")) + self.assertEqual(article_instance, result["article"]) + self.assertEqual(choices.PS_ENQUEUED_FOR_VALIDATION, result["package_status"]) + self.assertEqual(choices.PC_UPDATE, result["package_category"]) + + def test__check_article_and_journal__new_document(self, mock_xml_with_pre, mock_article_get, mock_issue_get, mock_journal_get): + + mock_xml_with_pre.return_value = {} + + mock_article_get.side_effect = KeyError + + issue_instance = Mock(spec=Issue) + mock_issue_get.return_value = issue_instance + issue_instance.supplement = "Suppl" + issue_instance.number = "Number" + issue_instance.volume = "Volume" + + journal_instance = Mock(spec=Journal) + journal_instance.issn_electronic = "ISSN-ELEC" + journal_instance.issn_print = "ISSN-PRIN" + + mock_journal_get.return_value = journal_instance + + xmltree = etree.fromstring( + "
" + "ISSN-ELEC" + "ISSN-PRIN" + "Título do periódico" + "" + "" + "Volume" + "Number" + "Suppl" + "" + "
", + ) + xml_with_pre = controller.XMLWithPre("", xmltree) + xml_with_pre.filename = "zzz.zip" + result = controller._check_article_and_journal(xml_with_pre) + self.assertIsNone(result.get("error")) + self.assertIsNone(result.get("article")) + self.assertEqual(choices.PS_ENQUEUED_FOR_VALIDATION, result["package_status"]) + self.assertEqual(choices.PC_NEW_DOCUMENT, result["package_category"]) + + +# def _check_article_and_journal(xml_with_pre): +# # verifica se o XML está registrado no sistema +# response = pp.is_registered_xml_with_pre(xml_with_pre, xml_with_pre.filename) + +# # verifica se o XML é esperado +# article_previous_status = _check_package_is_expected(response) + +# # verifica se XML já está associado a um article +# try: +# article = response.pop("article") +# except KeyError: +# article = None + +# # caso encontrado erro, sair da função +# if response.get("error"): +# return _handle_error(response, article, article_previous_status) + +# xmltree = xml_with_pre.xmltree + +# # verifica se journal e issue estão registrados +# response = _check_xml_journal_and_xml_issue_are_registered( +# xml_with_pre.filename, xmltree, response +# ) +# # caso encontrado erro, sair da função +# if response.get("error"): +# return _handle_error(response, article, article_previous_status) + +# if article: +# # verifica a consistência dos dados de journal e issue +# # no XML e na base de dados +# _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) +# if response.get("error"): +# # inconsistências encontradas +# return _handle_error(response, article, article_previous_status) +# else: +# # sem problemas +# response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION +# response.update({"article": article}) +# return response +# # documento novo +# response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION +# return response \ No newline at end of file From 769db66bcf9e3224e7ffa85c7535d677012a6fbf Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 31 Mar 2024 14:17:44 -0300 Subject: [PATCH 19/25] Atualiza main_ingress com main (#425) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Corrige Institution.__str__, adiciona atributos de autocomplete e altera InstitutionHistory.panels de FieldPanel para Autocomplete (#401) * Faz correções na app journal: adiciona Journal.title, wagtail_hooks.JournalCreateView, etc (#402) * Adiciona Journal.title * Modifica os atributos de journal.models.Owner e Publisher * Cria journal.wagtail.JournalCreateView para adicionar o usuário como creator * Adiciona migrações de banco de dados relacionados a journal * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos (#403) * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos, criando uma amostragem de migração * Adiciona os parâmetros journal_acron e publication_year * Garante que no XML migrado (seja nativo ou gerado a partir do HTML) tenha o PID v2 e o order (article-id other) (#405) * Corrige ou adiciona ao XML o elemento pid-v2 usando como valor o pid do artigo do site clássico * Atualiza packtools versão 3.4.0 para ter XMLWithPre.order Corrige ou adiciona ao XML o elemento article-id (other/order) usando como valor os últimos 5 dígitos do pid do artigo do site clássico * Atualiza a versão da biblioteca scielo_classic_website para 1.6.4 para corrigir a obtenção de registros de artigos em serial xml * Evita guardar versões anteriores dos arquivos * Cria o procedimento de corrigir o valor do Pid v2 (#410) * Cria PidProviderXML.fix_pid_v2 * Cria FixPidV2 para controlar o que foi corrigido no upload e no core * Cria FixPidV2ModelAdmin * Adiciona PidProviderAPIClient.fix_pid_v2, fix_pid_v2_url. Refatora PidProviderAPIClient.enabled * Cria APIPidProviderFixPidV2Error * Cria provider.requester.PidRequester.fix_pid_v2 * Cria SPSPkg.fix_pid_v2 * Cria ArticleProc.fix_pid_v2 e adiciona a chamada no procedimento de generate_sps_package * Cria tarefas para corriger o valor de pid v2 em PidProviderXML a partir de ArticleProc.pid * Cria provider.provider.PidProvider com os métodos fix_pid_v2, get_sps_pkg_name, get_xmltree * Adiciona a migração correspondente ao modelo FixPidV2 * Corrige ausencia de pid v3 no xml submetido do upload para o core (#411) * Atualiza a versão de packtools 4.1.1 para usar XMLWithPre.data e .files * Modifica PidProviderXML.is_registered para atualizar os pids de xml_with_pre com os valores registrados, além disso, era necessário retornar se está registrado e igual ou registrado e diferente ou não registrado * Distingue status de demanda de registro e status do registro * Modifica PidProviderAPIClient._process_post_xml_response para atualizar ou não os valores dos pids de xml_with_pre com os valores fornecidos pelo Core * Adiciona registered_in_core como filtro de PidProviderXMLModelAdmin * Atualiza dependencias base.txt e production.txt (#409) * Comenta app captcha * Atualiza dependencias --------- Co-authored-by: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> * Modifica comportamento de Pid provider, que passa a aceitar mudanças de pids (#415) * Cria PidProviderXML.complete_pids, que completa pids com registrados ou inéditos * Cria PidProviderXML._check_pids, que valida pid do XML é inédito e/ou registrado e/ou pertencente a outro documento * Cria PidProviderXML.get_pids, que retorna todos os pids vigentes e outros * Corrige PidProviderXML._is_registered_pid, adicionando a verificação em OtherPid * Corrige PidProviderXML._get_unique_v3, que usa _is_registered_pid e agora não precisa verificar OtherPid * Ajusta PidProviderXML._add_other_pid * Remove PidProviderXML._complete_pids excedente * Corrige PidProvider._add_pid_v3 e _add_pid_v2 * Corrige PidProviderXML.is_registered * Ajusta PidProviderXML._save, removendo _add_other_pid e removendo change_pids * Modifica PidProviderXML.register * Melhora XMLVersion.__str__, mostrando nome do arquivo + data no lugar de pid v3 * Melhora _process_post_xml_response * Para PidProvider.provide_pid_for_xml_with_pre, adiciona parâmetro caller, completa XML com pids registrados se ausentes no XML, adiciona xml_changed ao retorno * Adiciona comando para completar XML com pids registrados antes de solicitar pid para Core * Cria meio de configurar / habilitar / desabilitar fix_pid_v2 do Core (#416) * Cria a classe PidProviderEndpoint, inline de PidProviderConfig * Modifica o modo de obter fix_pid_v2_url * Adiciona modelo PidProviderEndpoint * Adiciona 'fixed_in_core': False ao retorno de fix_pid_v2 (#417) * Evita que SPSPkg armazene arquivos em excesso (#418) * Verifica se xml registrado e xml recebido são iguais, somente após completar XML com os pids registrados (#419) * Compara se xml_with_pre é igual ao registrado somente após adicionar os pids registrados se aplicável * Adiciona a funcionalidade de forçar o registro no Core mesmo que o registro está indicando que já está sincronizado * Melhora ordem dos itens do menu (#408) * Refatora a funcionalidade da ordem do menu * Reordena menu itens padrao do wagtail e remove algum deless * Insere funcao get_menu_order em menu_order * Altera a ordem dos app * Move as operações anteriores de ArticleProc, IssueProc, JournalProc para um arquivo (#420) * Cria o modelo ArticleProcReport e ArticleProcReportModelAdmin * Cria o modelo ProcReport para armazenar processamentos anteriores, mantendo apenas o vigente nos respectivos ArticleProc, IssueProc, JournalProc * Adiciona as migrações de banco de dados * Melhora o registro das operações das tarefas relacionadas à migração e publicação (#422) * Melhora os rótulos, deixa todos os campos não editáveis, apresenta os eventos do mais recente para o mais antigo * Adiciona Article.data, Issue.data, Journal.data * Adiciona retorno às função que criam instâncias de Article, Issue e Journal * Adiciona Article.data, Issue.data, Journal.data nos detalhes das operações de entrada de dados * Aplica black * Adiciona * Adiciona mais detalhes ao registro da tarefa de gerar o XML a partir do HTML * Adiciona mais detalhes ao registro da tarefa de gerar o pacote SPS * Corrige o valor de 'completed' dos resultados das operações de solicitação de pid v3 * Adiciona o parâmetro compression em ZipFile * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Modifica o sps_pkg_status para DONE se o pacote não tem todos os texts * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Corrige ausência de importação de ZIP_DEFLATED * Adiciona o atributo order para a listagem dos itens na área administrativa * Adiciona as migrações de banco de dados * Adiciona detalhes do processamento da adição de arquivos no minio * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#398) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 2 - Adiciona funções em upload.controller para avaliar o pacote recém recebido (#400) * Cria os upload.choices.VE_UNEXPECTED_ERROR e VE_FORBIDDEN_UPDATE_ERROR * Cria/Edita Package.get, create_or_update, _add_validation_result * Cria funções para avaliar o XML recém-recebido (é esperado? os dados de journal e issue estão corretos?) * Cria testes para upload.controller.* * Adiciona a migração de banco de dados por criar novos valores de choices * Corrige ausência de definição de variáveis * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes --------- Co-authored-by: Samuel Veiga Rangel <82840278+samuelveigarangel@users.noreply.github.com> From 79ef29ede226c31896c9ef19a6c6be868c2ae533 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 31 Mar 2024 15:23:59 -0300 Subject: [PATCH 20/25] =?UTF-8?q?Modifica=20a=20entrada=20do=20pacote=20pe?= =?UTF-8?q?lo=20upload=20e=20adiciona=20novas=20valida=C3=A7=C3=B5es=20do?= =?UTF-8?q?=20XML=20(#426)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Corrige Institution.__str__, adiciona atributos de autocomplete e altera InstitutionHistory.panels de FieldPanel para Autocomplete (#401) * Faz correções na app journal: adiciona Journal.title, wagtail_hooks.JournalCreateView, etc (#402) * Adiciona Journal.title * Modifica os atributos de journal.models.Owner e Publisher * Cria journal.wagtail.JournalCreateView para adicionar o usuário como creator * Adiciona migrações de banco de dados relacionados a journal * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos (#403) * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos, criando uma amostragem de migração * Adiciona os parâmetros journal_acron e publication_year * Garante que no XML migrado (seja nativo ou gerado a partir do HTML) tenha o PID v2 e o order (article-id other) (#405) * Corrige ou adiciona ao XML o elemento pid-v2 usando como valor o pid do artigo do site clássico * Atualiza packtools versão 3.4.0 para ter XMLWithPre.order Corrige ou adiciona ao XML o elemento article-id (other/order) usando como valor os últimos 5 dígitos do pid do artigo do site clássico * Atualiza a versão da biblioteca scielo_classic_website para 1.6.4 para corrigir a obtenção de registros de artigos em serial xml * Evita guardar versões anteriores dos arquivos * Cria o procedimento de corrigir o valor do Pid v2 (#410) * Cria PidProviderXML.fix_pid_v2 * Cria FixPidV2 para controlar o que foi corrigido no upload e no core * Cria FixPidV2ModelAdmin * Adiciona PidProviderAPIClient.fix_pid_v2, fix_pid_v2_url. Refatora PidProviderAPIClient.enabled * Cria APIPidProviderFixPidV2Error * Cria provider.requester.PidRequester.fix_pid_v2 * Cria SPSPkg.fix_pid_v2 * Cria ArticleProc.fix_pid_v2 e adiciona a chamada no procedimento de generate_sps_package * Cria tarefas para corriger o valor de pid v2 em PidProviderXML a partir de ArticleProc.pid * Cria provider.provider.PidProvider com os métodos fix_pid_v2, get_sps_pkg_name, get_xmltree * Adiciona a migração correspondente ao modelo FixPidV2 * Corrige ausencia de pid v3 no xml submetido do upload para o core (#411) * Atualiza a versão de packtools 4.1.1 para usar XMLWithPre.data e .files * Modifica PidProviderXML.is_registered para atualizar os pids de xml_with_pre com os valores registrados, além disso, era necessário retornar se está registrado e igual ou registrado e diferente ou não registrado * Distingue status de demanda de registro e status do registro * Modifica PidProviderAPIClient._process_post_xml_response para atualizar ou não os valores dos pids de xml_with_pre com os valores fornecidos pelo Core * Adiciona registered_in_core como filtro de PidProviderXMLModelAdmin * Atualiza dependencias base.txt e production.txt (#409) * Comenta app captcha * Atualiza dependencias --------- Co-authored-by: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> * Modifica comportamento de Pid provider, que passa a aceitar mudanças de pids (#415) * Cria PidProviderXML.complete_pids, que completa pids com registrados ou inéditos * Cria PidProviderXML._check_pids, que valida pid do XML é inédito e/ou registrado e/ou pertencente a outro documento * Cria PidProviderXML.get_pids, que retorna todos os pids vigentes e outros * Corrige PidProviderXML._is_registered_pid, adicionando a verificação em OtherPid * Corrige PidProviderXML._get_unique_v3, que usa _is_registered_pid e agora não precisa verificar OtherPid * Ajusta PidProviderXML._add_other_pid * Remove PidProviderXML._complete_pids excedente * Corrige PidProvider._add_pid_v3 e _add_pid_v2 * Corrige PidProviderXML.is_registered * Ajusta PidProviderXML._save, removendo _add_other_pid e removendo change_pids * Modifica PidProviderXML.register * Melhora XMLVersion.__str__, mostrando nome do arquivo + data no lugar de pid v3 * Melhora _process_post_xml_response * Para PidProvider.provide_pid_for_xml_with_pre, adiciona parâmetro caller, completa XML com pids registrados se ausentes no XML, adiciona xml_changed ao retorno * Adiciona comando para completar XML com pids registrados antes de solicitar pid para Core * Cria meio de configurar / habilitar / desabilitar fix_pid_v2 do Core (#416) * Cria a classe PidProviderEndpoint, inline de PidProviderConfig * Modifica o modo de obter fix_pid_v2_url * Adiciona modelo PidProviderEndpoint * Adiciona 'fixed_in_core': False ao retorno de fix_pid_v2 (#417) * Evita que SPSPkg armazene arquivos em excesso (#418) * Verifica se xml registrado e xml recebido são iguais, somente após completar XML com os pids registrados (#419) * Compara se xml_with_pre é igual ao registrado somente após adicionar os pids registrados se aplicável * Adiciona a funcionalidade de forçar o registro no Core mesmo que o registro está indicando que já está sincronizado * Melhora ordem dos itens do menu (#408) * Refatora a funcionalidade da ordem do menu * Reordena menu itens padrao do wagtail e remove algum deless * Insere funcao get_menu_order em menu_order * Altera a ordem dos app * Move as operações anteriores de ArticleProc, IssueProc, JournalProc para um arquivo (#420) * Cria o modelo ArticleProcReport e ArticleProcReportModelAdmin * Cria o modelo ProcReport para armazenar processamentos anteriores, mantendo apenas o vigente nos respectivos ArticleProc, IssueProc, JournalProc * Adiciona as migrações de banco de dados * Melhora o registro das operações das tarefas relacionadas à migração e publicação (#422) * Melhora os rótulos, deixa todos os campos não editáveis, apresenta os eventos do mais recente para o mais antigo * Adiciona Article.data, Issue.data, Journal.data * Adiciona retorno às função que criam instâncias de Article, Issue e Journal * Adiciona Article.data, Issue.data, Journal.data nos detalhes das operações de entrada de dados * Aplica black * Adiciona * Adiciona mais detalhes ao registro da tarefa de gerar o XML a partir do HTML * Adiciona mais detalhes ao registro da tarefa de gerar o pacote SPS * Corrige o valor de 'completed' dos resultados das operações de solicitação de pid v3 * Adiciona o parâmetro compression em ZipFile * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Modifica o sps_pkg_status para DONE se o pacote não tem todos os texts * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Corrige ausência de importação de ZIP_DEFLATED * Adiciona o atributo order para a listagem dos itens na área administrativa * Adiciona as migrações de banco de dados * Adiciona detalhes do processamento da adição de arquivos no minio * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#398) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 2 - Adiciona funções em upload.controller para avaliar o pacote recém recebido (#400) * Cria os upload.choices.VE_UNEXPECTED_ERROR e VE_FORBIDDEN_UPDATE_ERROR * Cria/Edita Package.get, create_or_update, _add_validation_result * Cria funções para avaliar o XML recém-recebido (é esperado? os dados de journal e issue estão corretos?) * Cria testes para upload.controller.* * Adiciona a migração de banco de dados por criar novos valores de choices * Corrige ausência de definição de variáveis * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Aplica black * Cria função para associar os tipos de erros com os relatórios e faz ajustes nos tipos de erros * Associa por inferência o tipo de impacto de cada tipo de erro * Refatora Package.check_opinions e check_resolutions; Remove article e issue do formulário * Corrige defeitos das validações iniciais à recepção do pacote e ajusta a validação do conteúdo do XML * Remove a verificação de article e issue no formulário * Troca a tarefa que executará as validações --------- Co-authored-by: Samuel Veiga Rangel <82840278+samuelveigarangel@users.noreply.github.com> --- upload/choices.py | 61 ++++++---------- upload/controller.py | 154 +++++++++++++++++++++++++++++++++++---- upload/forms.py | 8 +- upload/models.py | 60 ++++++++------- upload/tasks.py | 79 ++++++++++---------- upload/tests.py | 69 +++++++++++++----- upload/wagtail_hooks.py | 97 ++++++++++-------------- upload/xml_validation.py | 55 ++++++++------ 8 files changed, 358 insertions(+), 225 deletions(-) diff --git a/upload/choices.py b/upload/choices.py index 0a6ce5b4..edc669e3 100644 --- a/upload/choices.py +++ b/upload/choices.py @@ -47,15 +47,17 @@ VE_PACKAGE_FILE_ERROR = "package-file-error" VE_UNEXPECTED_ERROR = "unexpected-error" VE_FORBIDDEN_UPDATE_ERROR = "forbidden-update-error" -VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR = "article-journal-incompatibility-error" +VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR = "journal-incompatibility-error" VE_ARTICLE_IS_NOT_NEW_ERROR = "article-is-not-new-error" VE_XML_FORMAT_ERROR = "xml-format-error" +VE_XML_CONTENT_ERROR = "xml-content-error" VE_BIBLIOMETRICS_DATA_ERROR = "bibliometrics-data-error" VE_SERVICES_DATA_ERROR = "services-data-error" VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" VE_ASSET_ERROR = "asset-error" VE_RENDITION_ERROR = "rendition-error" +VE_GROUP_DATA_ERROR = "group-error" VALIDATION_ERROR_CATEGORY = ( (VE_UNEXPECTED_ERROR, "UNEXPECTED_ERROR"), @@ -63,6 +65,8 @@ (VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, "ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR"), (VE_ARTICLE_IS_NOT_NEW_ERROR, "ARTICLE_IS_NOT_NEW_ERROR"), (VE_XML_FORMAT_ERROR, "XML_FORMAT_ERROR"), + (VE_XML_CONTENT_ERROR, "VE_XML_CONTENT_ERROR"), + (VE_GROUP_DATA_ERROR, "VE_GROUP_DATA_ERROR"), (VE_BIBLIOMETRICS_DATA_ERROR, "BIBLIOMETRICS_DATA_ERROR"), (VE_SERVICES_DATA_ERROR, "SERVICES_DATA_ERROR"), (VE_DATA_CONSISTENCY_ERROR, "DATA_CONSISTENCY_ERROR"), @@ -75,56 +79,39 @@ VR_XML_OR_DTD = "xml_or_dtd" VR_ASSET_AND_RENDITION = "asset_and_rendition" VR_INDIVIDUAL_CONTENT = "individual_content" -VR_GROUPED_CONTENT = "grouped_content" +VR_GROUP_CONTENT = "group_content" VR_STYLESHEET = "stylesheet" VR_PACKAGE_FILE = "package_file" -VALIDATION_REPORT_ITEMS = { - VR_XML_OR_DTD: set( - [ - VE_XML_FORMAT_ERROR, - ] - ), - VR_ASSET_AND_RENDITION: set( - [ - VE_ASSET_ERROR, - VE_RENDITION_ERROR, - ] - ), - VR_INDIVIDUAL_CONTENT: set( - [ - VE_ARTICLE_IS_NOT_NEW_ERROR, - VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, - VE_BIBLIOMETRICS_DATA_ERROR, - VE_DATA_CONSISTENCY_ERROR, - ] - ), - VR_GROUPED_CONTENT: set( - [ - VE_CRITERIA_ISSUES_ERROR, - VE_SERVICES_DATA_ERROR, - ] - ), - VR_PACKAGE_FILE: set( - [ - VE_PACKAGE_FILE_ERROR, - ] - ), -} - VALIDATION_DICT_ERROR_CATEGORY_TO_REPORT = { VE_XML_FORMAT_ERROR: VR_XML_OR_DTD, VE_ASSET_ERROR: VR_ASSET_AND_RENDITION, VE_RENDITION_ERROR: VR_ASSET_AND_RENDITION, VE_ARTICLE_IS_NOT_NEW_ERROR: VR_INDIVIDUAL_CONTENT, VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR: VR_INDIVIDUAL_CONTENT, + VE_XML_CONTENT_ERROR: VR_INDIVIDUAL_CONTENT, VE_BIBLIOMETRICS_DATA_ERROR: VR_INDIVIDUAL_CONTENT, VE_DATA_CONSISTENCY_ERROR: VR_INDIVIDUAL_CONTENT, - VE_CRITERIA_ISSUES_ERROR: VR_GROUPED_CONTENT, - VE_SERVICES_DATA_ERROR: VR_GROUPED_CONTENT, + VE_CRITERIA_ISSUES_ERROR: VR_INDIVIDUAL_CONTENT, + VE_SERVICES_DATA_ERROR: VR_INDIVIDUAL_CONTENT, + VE_GROUP_DATA_ERROR: VR_GROUP_CONTENT, VE_PACKAGE_FILE_ERROR: VR_PACKAGE_FILE, + VE_UNEXPECTED_ERROR: VR_PACKAGE_FILE, + VE_FORBIDDEN_UPDATE_ERROR: VR_PACKAGE_FILE, + } + +def _get_categories(): + d = {} + for k, v in VALIDATION_DICT_ERROR_CATEGORY_TO_REPORT.items(): + d.setdefault(v, []) + d[v].append(k) + return d + + +VALIDATION_REPORT_ITEMS = _get_categories() + # Model ValidationResult, Field status VS_CREATED = "created" VS_DISAPPROVED = "disapproved" diff --git a/upload/controller.py b/upload/controller.py index a6455da4..013e9d36 100644 --- a/upload/controller.py +++ b/upload/controller.py @@ -2,6 +2,7 @@ import sys from datetime import datetime +from django.utils.translation import gettext as _ from packtools.sps.models.journal_meta import Title, ISSN from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre, GetXMLItemsError from packtools.sps.models.front_articlemeta_issue import ArticleMetaIssue @@ -21,11 +22,19 @@ choices, ) from .utils import file_utils, package_utils, xml_utils + +from upload import xml_validation from pid_provider.requester import PidRequester from article.models import Article from issue.models import Issue from journal.models import OfficialJournal, Journal from tracker.models import UnexpectedEvent +from upload.xml_validation import ( + validate_xml_content, + add_app_data, + add_sps_data, + add_journal_data, +) pp = PidRequester() @@ -122,7 +131,7 @@ def receive_package(package): data={}, ) # falhou, retorna response - return package + return response # sucesso, retorna package package._add_validation_result( error_category=choices.VE_XML_FORMAT_ERROR, @@ -132,11 +141,10 @@ def receive_package(package): "xml_path": package.file.path, }, ) - return package + return response except GetXMLItemsError as exc: # identifica os erros do arquivo Zip / XML - _identify_file_error(package) - return package + return _identify_file_error(package) def _identify_file_error(package): @@ -145,13 +153,18 @@ def _identify_file_error(package): xml_path = None xml_str = file_utils.get_xml_content_from_zip(package.file.path, xml_path) xml_utils.get_etree_from_xml_content(xml_str) - except (file_utils.BadPackageFileError, file_utils.PackageWithoutXMLFileError) as exc: + return {} + except ( + file_utils.BadPackageFileError, + file_utils.PackageWithoutXMLFileError, + ) as exc: package._add_validation_result( error_category=choices.VE_PACKAGE_FILE_ERROR, message=exc.message, status=choices.VS_DISAPPROVED, data={"exception": str(exc), "exception_type": str(type(exc))}, ) + return {"error": str(exc), "error_type": choices.VE_PACKAGE_FILE_ERROR} except xml_utils.XMLFormatError as e: data = { @@ -166,6 +179,7 @@ def _identify_file_error(package): data=data, status=choices.VS_DISAPPROVED, ) + return {"error": str(e), "error_type": choices.VE_XML_FORMAT_ERROR} def _check_article_and_journal(xml_with_pre): @@ -198,7 +212,9 @@ def _check_article_and_journal(xml_with_pre): if article: # verifica a consistência dos dados de journal e issue # no XML e na base de dados - _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article( + article, response + ) if response.get("error"): # inconsistências encontradas return _handle_error(response, article, article_previous_status) @@ -241,7 +257,9 @@ def _get_article_previous_status(article, response): response["package_category"] = choices.PC_ERRATUM return article_previos_status else: - response["error"] = f"Unexpected package. Article has no need to be updated / corrected. Article status: {article_previos_status}" + response[ + "error" + ] = f"Unexpected package. Article has no need to be updated / corrected. Article status: {article_previos_status}" response["error_type"] = choices.VE_FORBIDDEN_UPDATE_ERROR response["package_category"] = choices.PC_UPDATE @@ -284,12 +302,12 @@ def _get_journal(journal_title, issn_electronic, issn_print): if not j and journal_title: try: - j = OfficialJournal.objects.get(journal_title=journal_title) + j = OfficialJournal.objects.get(title=journal_title) except OfficialJournal.DoesNotExist: pass if j: - return Journal.objects.get(official=j) + return Journal.objects.get(official_journal=j) raise Journal.DoesNotExist(f"{journal_title} {issn_electronic} {issn_print}") @@ -301,11 +319,11 @@ def _check_journal(origin, xmltree): xml = ISSN(xmltree) issn_electronic = xml.epub issn_print = xml.ppub - return dict(journal=_get_journal(journal_title, issn_electronic, issn_print)) - except Journal.DoesNotExist: + except Journal.DoesNotExist as exc: + logging.exception(exc) return dict( - error=f"Journal in XML is not registered in Upload: {journal_title} {issn_electronic} (electronic) {issn_print} (print)", + error=f"Journal in XML is not registered in Upload: {journal_title} (electronic: {issn_electronic}, print: {issn_print})", error_type=choices.VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, ) except Exception as e: @@ -347,7 +365,9 @@ def _check_issue(origin, xmltree, journal): return {"error": str(e), "error_type": choices.VE_UNEXPECTED_ERROR} -def _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response): +def _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article( + article, response +): issue = response["issue"] journal = response["journal"] if article.issue is issue and article.journal is journal: @@ -366,3 +386,111 @@ def _compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(articl error_type=choices.VE_DATA_CONSISTENCY_ERROR, ) ) + + +def validate_xml_content(package, journal, issue): + # VE_BIBLIOMETRICS_DATA_ERROR = "bibliometrics-data-error" + # VE_SERVICES_DATA_ERROR = "services-data-error" + # VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" + # VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" + + # TODO completar data + data = {} + # add_app_data(data, app_data) + # add_journal_data(data, journal, issue) + # add_sps_data(data, sps_data) + + try: + for xml_with_pre in XMLWithPre.create(path=package.file.path): + _validate_xml_content(package, xml_with_pre, data) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "upload.controller.validate_xml_content", + "detail": dict(file_path=package.file.path), + }, + ) + + +def _validate_xml_content(package, xml_with_pre, data): + # TODO completar data + data = {} + # xml_validation.add_app_data(data, app_data) + # xml_validation.add_journal_data(data, journal, issue) + # xml_validation.add_sps_data(data, sps_data) + + try: + results = xml_validation.validate_xml_content( + xml_with_pre.sps_pkg_name, xml_with_pre.xmltree, data + ) + for result in results: + _handle_xml_content_validation_result(package, xml_with_pre.sps_pkg_name, result) + try: + error = ValidationResult.objects.filter( + package=package, + status=choices.VS_DISAPPROVED, + category__in=choices.VALIDATION_REPORT_ITEMS[choices.VR_INDIVIDUAL_CONTENT], + )[0] + package.status = choices.PS_VALIDATED_WITH_ERRORS + except IndexError: + # nenhum erro + package.status = choices.PS_VALIDATED_WITHOUT_ERRORS + package.save() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "upload.controller._validate_xml_content", + "detail": { + "file": package.file.path, + "item": xml_with_pre.sps_pkg_name, + "exception": str(e), + "exception_type": str(type(e)), + }, + }, + ) + + +def _handle_xml_content_validation_result(package, sps_pkg_name, result): + # ['xpath', 'advice', 'title', 'expected_value', 'got_value', 'message', 'validation_type', 'response'] + + try: + if result["response"] == "OK": + status = choices.VS_APPROVED + else: + status = choices.VS_DISAPPROVED + + # VE_BIBLIOMETRICS_DATA_ERROR, VE_SERVICES_DATA_ERROR, + # VE_DATA_CONSISTENCY_ERROR, VE_CRITERIA_ISSUES_ERROR, + error_category = result.get("error_category") or choices.VE_XML_CONTENT_ERROR + + message = result["message"] + advice = result["advice"] or "" + message = ". ".join([_(message), _(advice)]) + package._add_validation_result( + error_category=error_category, + status=status, + message=message, + data=result, + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "upload.controller._handle_xml_content_validation_result", + "detail": { + "file": package.file.path, + "item": sps_pkg_name, + "result": result, + "exception": str(e), + "exception_type": str(type(e)), + }, + }, + ) diff --git a/upload/forms.py b/upload/forms.py index 5cdd8f64..cf208e5c 100644 --- a/upload/forms.py +++ b/upload/forms.py @@ -3,18 +3,12 @@ class UploadPackageForm(WagtailAdminModelForm): - def save_all(self, user, article, issue): + def save_all(self, user): upload_package = super().save(commit=False) if self.instance.pk is None: upload_package.creator = user - if article is not None: - upload_package.article = article - - if issue is not None: - upload_package.issue = issue - self.save() return upload_package diff --git a/upload/models.py b/upload/models.py index 5bdbc5c3..f1365400 100644 --- a/upload/models.py +++ b/upload/models.py @@ -41,7 +41,10 @@ class Package(CommonControlField): default=choices.PS_ENQUEUED_FOR_VALIDATION, ) article = models.ForeignKey( - Article, blank=True, null=True, on_delete=models.SET_NULL, + Article, + blank=True, + null=True, + on_delete=models.SET_NULL, ) issue = models.ForeignKey(Issue, blank=True, null=True, on_delete=models.SET_NULL) assignee = models.ForeignKey(User, blank=True, null=True, on_delete=models.SET_NULL) @@ -54,9 +57,6 @@ def autocomplete_label(self): panels = [ FieldPanel("file"), - FieldPanel("category"), - AutocompletePanel("article"), - AutocompletePanel("issue"), ] def __str__(self): @@ -93,16 +93,13 @@ def add_validation_result( cls, package_id, error_category=None, status=None, message=None, data=None ): package = cls.objects.get(pk=package_id) - val_res = package._add_validation_result( - error_category, status, message, data) + val_res = package._add_validation_result(error_category, status, message, data) return val_res def _add_validation_result( self, error_category=None, status=None, message=None, data=None ): - val_res = ValidationResult.create( - error_category, self, status, message, data - ) + val_res = ValidationResult.create(error_category, self, status, message, data) self.update_status(val_res) return val_res @@ -145,26 +142,27 @@ def create_or_update(cls, user_id, file, article=None, category=None, status=Non user_id, file, article_id=article.id, category=category, status=status ) - def check_errors(self): - for vr in self.validationresult_set.filter(status=choices.VS_DISAPPROVED): - if vr.resolution.action in (choices.ER_ACTION_TO_FIX, ""): - self.status = choices.PS_PENDING_CORRECTION - self.save() - return self.status - - self.status = choices.PS_READY_TO_BE_FINISHED + def check_resolutions(self): + try: + item = self.validationresult_set.filter( + status=choices.VS_DISAPPROVED, + resolution__action__in=[choices.ER_ACTION_TO_FIX, ""], + )[0] + self.status = choices.PS_PENDING_CORRECTION + except IndexError: + self.status = choices.PS_READY_TO_BE_FINISHED self.save() return self.status def check_opinions(self): - for vr in self.validationresult_set.filter(status=choices.VS_DISAPPROVED): - opinion = vr.analysis.opinion - if opinion in (choices.ER_OPINION_FIX_DEMANDED, ""): - self.status = choices.PS_PENDING_CORRECTION - self.save() - return self.status - - self.status = choices.PS_ACCEPTED + try: + item = self.validationresult_set.filter( + status=choices.VS_DISAPPROVED, + analysis__opinion__in=[choices.ER_OPINION_FIX_DEMANDED, ""], + )[0] + self.status = choices.PS_PENDING_CORRECTION + except IndexError: + self.status = choices.PS_ACCEPTED self.save() return self.status @@ -186,7 +184,7 @@ class ValidationResult(models.Model): id = models.AutoField(primary_key=True) category = models.CharField( _("Error category"), - max_length=64, + max_length=32, choices=choices.VALIDATION_ERROR_CATEGORY, null=False, blank=False, @@ -247,9 +245,7 @@ class Meta: base_form_class = ValidationResultForm @classmethod - def create( - cls, error_category, package, status=None, message=None, data=None - ): + def create(cls, error_category, package, status=None, message=None, data=None): val_res = ValidationResult() val_res.category = error_category val_res.package = package @@ -270,8 +266,7 @@ def update(self, error_category, status=None, message=None, data=None): @classmethod def add_resolution(cls, user, data): - validation_result = cls.objects.get( - pk=data["validation_result_id"].value()) + validation_result = cls.objects.get(pk=data["validation_result_id"].value()) try: opinion = data["opinion"].value() @@ -302,6 +297,7 @@ class ErrorResolution(CommonControlField): _("Action"), max_length=32, choices=choices.ERROR_RESOLUTION_ACTION, + default=choices.ER_ACTION_TO_FIX, null=True, blank=True, ) @@ -333,6 +329,8 @@ def create_or_update(cls, user, validation_result, action, rationale): obj = cls.get(validation_result) obj.updated = datetime.now() obj.updated_by = user + obj.action = action + obj.rationale = rationale obj.save() except cls.DoesNotExist: obj = cls.create(user, validation_result, action, rationale) diff --git a/upload/tasks.py b/upload/tasks.py index 5ed8f808..01e7839a 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -1,4 +1,6 @@ import json +import sys +import logging from celery.result import AsyncResult from django.contrib.auth import get_user_model @@ -16,18 +18,20 @@ from article.models import Article from config import celery_app from issue.models import Issue +from journal.models import Journal from journal.controller import get_journal_dict_for_validation from libs.dsm.publication.documents import get_document, get_similar_documents +from tracker.models import UnexpectedEvent from . import choices, controller, exceptions from .utils import file_utils, package_utils, xml_utils from upload.models import Package -from upload.xml_validation import validate_xml_content, add_app_data, add_sps_data, add_journal_data User = get_user_model() +# TODO REMOVE def run_validations( filename, package_id, package_category, article_id=None, issue_id=None ): @@ -442,6 +446,7 @@ def task_validate_renditions(file_path, xml_path, package_id): return True +# TODO REMOVE @celery_app.task(name="Validate XML") def task_validate_content_xml(file_path, xml_path, package_id): xml_str = file_utils.get_xml_content_from_zip(file_path) @@ -544,17 +549,20 @@ def task_request_pid_for_accepted_packages(self, user_id): @celery_app.task(bind=True) -def task_validate_original_zip_file(self, package_id, file_path, journal_id, issue_id, article_id): +def task_validate_original_zip_file( + self, package_id, file_path, journal_id, issue_id, article_id +): - for xml_with_pre in XMLWithPre.create(file_path=file_path): + for xml_with_pre in XMLWithPre.create(path=file_path): xml_path = xml_with_pre.filename - break - if xml_path: + # FIXME nao usar o otimizado neste momento + optimised_filepath = task_optimise_package(file_path) + # Aciona validação de Assets task_validate_assets.apply_async( kwargs={ - "file_path": file_path, + "file_path": optimised_filepath, "xml_path": xml_path, "package_id": package_id, }, @@ -563,7 +571,7 @@ def task_validate_original_zip_file(self, package_id, file_path, journal_id, iss # Aciona validação de Renditions task_validate_renditions.apply_async( kwargs={ - "file_path": file_path, + "file_path": optimised_filepath, "xml_path": xml_path, "package_id": package_id, }, @@ -583,33 +591,30 @@ def task_validate_original_zip_file(self, package_id, file_path, journal_id, iss @celery_app.task(bind=True) -def task_validate_xml_content(self, file_path, xml_path, package_id, journal_id, issue_id, article_id): - # VE_BIBLIOMETRICS_DATA_ERROR = "bibliometrics-data-error" - # VE_SERVICES_DATA_ERROR = "services-data-error" - # VE_DATA_CONSISTENCY_ERROR = "data-consistency-error" - # VE_CRITERIA_ISSUES_ERROR = "criteria-issues-error" - - # TODO completar data - data = {} - # add_app_data(data, app_data) - # add_journal_data(data, journal, issue) - # add_sps_data(data, sps_data) - - package = Package.objects.get(pk=package_id) - for xml_with_pre in XMLWithPre.create(file_path=file_path): - results = validate_xml_content(xml_with_pre.sps_pkg_name, xml_with_pre.xmltree, data) - - for result in results: - # ['xpath', 'advice', 'title', 'expected_value', 'got_value', 'message', 'validation_type', 'response'] - if not result["response"] == "ERROR": - continue - - message = result["message"] - advice = result["advice"] or '' - message = ". ".join(_(message), _(advice)) - package._add_validation_result( - error_category=choices.VE_DATA_CONSISTENCY_ERROR, - status=choices.VS_DISAPPROVED, - message=message, - data=result, - ) +def task_validate_xml_content( + self, file_path, xml_path, package_id, journal_id, issue_id, article_id +): + try: + package = Package.objects.get(pk=package_id) + if journal_id: + journal = Journal.objects.get(pk=journal_id) + else: + journal = None + + if issue_id: + issue = Issue.objects.get(pk=issue_id) + else: + issue = None + + controller.validate_xml_content(package, journal, issue) + + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "operation": "upload.tasks.task_validate_xml_content", + "detail": dict(file_path=file_path, xml_path=xml_path), + }, + ) diff --git a/upload/tests.py b/upload/tests.py index bfd1b3a2..eb798cb2 100644 --- a/upload/tests.py +++ b/upload/tests.py @@ -12,7 +12,9 @@ # Create your tests here. class ControllerTest(TestCase): - def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_and_issue_differ(self): + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_and_issue_differ( + self, + ): response = {"journal": "not journal", "issue": "not issue"} article = Mock(spec=Article) article.issue = "issue" @@ -23,11 +25,15 @@ def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_j "error": f"{article.journal} {article.issue} (registered) differs from {journal} {issue} (XML)", "error_type": choices.VE_DATA_CONSISTENCY_ERROR, } - controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article( + article, response + ) self.assertEqual(expected["error"], response["error"]) self.assertEqual(expected["error_type"], response["error_type"]) - def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_issue_differs(self): + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_issue_differs( + self, + ): response = {"journal": "Journal", "issue": "Not same issue"} article = Mock(spec=Article) article.issue = "Issue" @@ -38,11 +44,15 @@ def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_i "error": f"{article.journal} {article.issue} (registered) differs from {journal} {issue} (XML)", "error_type": choices.VE_DATA_CONSISTENCY_ERROR, } - controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article( + article, response + ) self.assertEqual(expected["error"], response["error"]) self.assertEqual(expected["error_type"], response["error_type"]) - def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_differs(self): + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_differs( + self, + ): response = {"journal": "not journal", "issue": "issue"} article = Mock(spec=Article) article.issue = "issue" @@ -53,11 +63,15 @@ def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_j "error": f"{article.journal} (registered) differs from {journal} (XML)", "error_type": choices.VE_ARTICLE_JOURNAL_INCOMPATIBILITY_ERROR, } - controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article( + article, response + ) self.assertEqual(expected["error"], response["error"]) self.assertEqual(expected["error_type"], response["error_type"]) - def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_and_issue_compatible(self): + def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_journal_and_issue_compatible( + self, + ): response = {"journal": "journal", "issue": "issue"} article = Mock(spec=Article) article.issue = "issue" @@ -67,7 +81,9 @@ def test__compare_journal_and_issue_from_xml_to_journal_and_issue_from_article_j expected = { "package_status": choices.PS_ENQUEUED_FOR_VALIDATION, } - controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article(article, response) + controller._compare_journal_and_issue_from_xml_to_journal_and_issue_from_article( + article, response + ) self.assertIsNone(response.get("error")) self.assertEqual(expected["package_status"], response["package_status"]) @@ -274,7 +290,9 @@ def test__get_journal_with_issn_print(self, mock_journal_get, mock_official_j_ge @patch("upload.controller.OfficialJournal.objects.get") @patch("upload.controller.Journal.objects.get") - def test__get_journal_with_journal_title(self, mock_journal_get, mock_official_j_get): + def test__get_journal_with_journal_title( + self, mock_journal_get, mock_official_j_get + ): journal = Journal() official_j = OfficialJournal() mock_journal_get.return_value = journal @@ -289,7 +307,9 @@ def test__get_journal_with_journal_title(self, mock_journal_get, mock_official_j @patch("upload.controller.OfficialJournal.objects.get") @patch("upload.controller.Journal.objects.get") - def test__get_journal_with_issn_print_after_raise_exception_does_not_exist_for_issn_electronic(self, mock_journal_get, mock_official_j_get): + def test__get_journal_with_issn_print_after_raise_exception_does_not_exist_for_issn_electronic( + self, mock_journal_get, mock_official_j_get + ): journal = Journal() official_j = OfficialJournal() mock_journal_get.return_value = journal @@ -307,13 +327,15 @@ def test__get_journal_with_issn_print_after_raise_exception_does_not_exist_for_i [ call(issn_electronic="EEEEEEE"), call(issn_print="XXXXXXX"), - ] + ], ) mock_journal_get.assert_called_with(official=official_j) @patch("upload.controller.OfficialJournal.objects.get") @patch("upload.controller.Journal.objects.get") - def test__get_journal_raises_multiple_object_returned(self, mock_journal_get, mock_official_j_get): + def test__get_journal_raises_multiple_object_returned( + self, mock_journal_get, mock_official_j_get + ): journal = Journal() official_j = OfficialJournal() mock_journal_get.return_value = journal @@ -328,14 +350,13 @@ def test__get_journal_raises_multiple_object_returned(self, mock_journal_get, mo mock_official_j_get.mock_calls, [ call(issn_electronic="EEEEEEE"), - ] + ], ) mock_journal_get.assert_not_called() @patch("upload.controller.Article") class GetArticlePreviousStatusTest(TestCase): - def test_get_article_previous_status_require_update(self, mock_article): response = {} article = Mock(spec=Article) @@ -354,7 +375,9 @@ def test_get_article_previous_status_required_erratum(self, mock_article): self.assertEqual(article.status, article_choices.AS_CHANGE_SUBMITTED) self.assertEqual(response["package_category"], choices.PC_ERRATUM) - def test_get_article_previous_status_not_required_erratum_and_not_require_update(self, mock_article): + def test_get_article_previous_status_not_required_erratum_and_not_require_update( + self, mock_article + ): response = {} article = Mock(spec=Article) article.status = "no matter what" @@ -362,7 +385,10 @@ def test_get_article_previous_status_not_required_erratum_and_not_require_update self.assertIsNone(result) self.assertEqual("no matter what", article.status) self.assertEqual(response["package_category"], choices.PC_UPDATE) - self.assertEqual(f"Unexpected package. Article has no need to be updated / corrected. Article status: no matter what", response["error"]) + self.assertEqual( + f"Unexpected package. Article has no need to be updated / corrected. Article status: no matter what", + response["error"], + ) self.assertEqual(choices.VE_FORBIDDEN_UPDATE_ERROR, response["error_type"]) @@ -371,8 +397,9 @@ def test_get_article_previous_status_not_required_erratum_and_not_require_update @patch("upload.controller.Article.objects.get") @patch("upload.controller.PidRequester.is_registered_xml_with_pre") class CheckArticleAndJournalTest(TestCase): - - def test__check_article_and_journal__registered_and_allowed_to_be_updated(self, mock_xml_with_pre, mock_article_get, mock_issue_get, mock_journal_get): + def test__check_article_and_journal__registered_and_allowed_to_be_updated( + self, mock_xml_with_pre, mock_article_get, mock_issue_get, mock_journal_get + ): mock_xml_with_pre.return_value = {"v3": "yjukillojhk"} @@ -417,7 +444,9 @@ def test__check_article_and_journal__registered_and_allowed_to_be_updated(self, self.assertEqual(choices.PS_ENQUEUED_FOR_VALIDATION, result["package_status"]) self.assertEqual(choices.PC_UPDATE, result["package_category"]) - def test__check_article_and_journal__new_document(self, mock_xml_with_pre, mock_article_get, mock_issue_get, mock_journal_get): + def test__check_article_and_journal__new_document( + self, mock_xml_with_pre, mock_article_get, mock_issue_get, mock_journal_get + ): mock_xml_with_pre.return_value = {} @@ -498,4 +527,4 @@ def test__check_article_and_journal__new_document(self, mock_xml_with_pre, mock_ # return response # # documento novo # response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION -# return response \ No newline at end of file +# return response diff --git a/upload/wagtail_hooks.py b/upload/wagtail_hooks.py index 87b3e750..dbc2e424 100644 --- a/upload/wagtail_hooks.py +++ b/upload/wagtail_hooks.py @@ -2,6 +2,7 @@ from django.contrib import messages from django.http import HttpResponseRedirect +from django.shortcuts import get_object_or_404, redirect, render from django.urls import include, path from django.utils.translation import gettext as _ from wagtail import hooks @@ -27,77 +28,55 @@ choices, ) from .permission_helper import UploadPermissionHelper -from .tasks import run_validations +from .controller import receive_package from .utils import package_utils +from upload.tasks import task_validate_original_zip_file class PackageCreateView(CreateView): - def get_instance(self): - package_obj = super().get_instance() - - pkg_category = self.request.GET.get("package_category") - if pkg_category: - package_obj.category = pkg_category - - article_id = self.request.GET.get("article_id") - if article_id: - try: - package_obj.article = Article.objects.get(pk=article_id) - except Article.DoesNotExist: - ... - - return package_obj def form_valid(self, form): - article_data = self.request.POST.get("article") - article_json = json.loads(article_data) or {} - article_id = article_json.get("pk") - try: - article = Article.objects.get(pk=article_id) - except (Article.DoesNotExist, ValueError): - article = None - issue_data = self.request.POST.get("issue") - issue_json = json.loads(issue_data) or {} - issue_id = issue_json.get("pk") - try: - issue = Issue.objects.get(pk=issue_id) - except (Issue.DoesNotExist, ValueError): - issue = None + package = form.save_all(self.request.user) - self.object = form.save_all(self.request.user, article, issue) + response = receive_package(package) - if self.object.category in (choices.PC_UPDATE, choices.PC_ERRATUM): - if self.object.article is None: - messages.error( - self.request, - _("It is necessary to select an Article."), - ) - return HttpResponseRedirect(self.request.META["HTTP_REFERER"]) - else: - messages.success( - self.request, - _("Package to change article has been successfully submitted."), - ) + if response.get("error_type") == choices.VE_PACKAGE_FILE_ERROR: + # error no arquivo + messages.error(self.request, response.get("error")) + return HttpResponseRedirect(self.request.META["HTTP_REFERER"]) - if self.object.category == choices.PC_NEW_DOCUMENT: - if self.object.issue is None: - messages.error(self.request, _("It is necessary to select an Issue.")) - return HttpResponseRedirect(self.request.META["HTTP_REFERER"]) - else: - messages.success( - self.request, - _("Package to create article has been successfully submitted."), - ) + if response.get("error"): + # error + messages.error(self.request, response.get("error")) + return redirect(f"/admin/upload/package/inspect/{package.id}") - run_validations( - self.object.file.name, - self.object.id, - self.object.category, - article_id, - issue_id, + messages.success( + self.request, + _("Package has been successfully submitted and will be analyzed"), ) + # dispara a tarefa que realiza as validações de + # assets, renditions, XML content etc + + try: + journal_id = response["journal"].id + except (KeyError, AttributeError): + journal_id = None + try: + issue_id = response["issue"].id + except (KeyError, AttributeError): + issue_id = None + + task_validate_original_zip_file.apply_async( + kwargs=dict( + package_id=package.id, + file_path=package.file.path, + journal_id=journal_id, + issue_id=issue_id, + article_id=package.article and package.article.id or None, + ) + ) return HttpResponseRedirect(self.get_success_url()) @@ -378,7 +357,7 @@ class UploadModelAdminGroup(ModelAdminGroup): menu_order = get_menu_order("upload") -# modeladmin_register(UploadModelAdminGroup) +modeladmin_register(UploadModelAdminGroup) @hooks.register("register_admin_urls") diff --git a/upload/xml_validation.py b/upload/xml_validation.py index 304d0b63..0af4c99d 100644 --- a/upload/xml_validation.py +++ b/upload/xml_validation.py @@ -22,6 +22,9 @@ from packtools.sps.validation.journal_meta import JournalMetaValidation from packtools.sps.validation.preprint import PreprintValidation from packtools.sps.validation.related_articles import RelatedArticlesValidation + +from upload import choices +from upload.models import ValidationResult from tracker.models import UnexpectedEvent @@ -84,28 +87,38 @@ def add_sps_data(data, sps_data): def validate_xml_content(sps_pkg_name, xmltree, data): - - functions = ( - validate_affiliations, - validate_languages, - validate_article_attributes, - validate_article_id_other, - validate_subjects, - validate_article_type, - validate_authors, - validate_data_availability, - validate_doi, - validate_article_languages, - validate_licenses, - validate_toc_sections, - validate_xref, - validate_dates, - validate_journal, - validate_preprint, - validate_related_articles, + # TODO adicionar error_category + # VE_XML_CONTENT_ERROR: generic usage + # VE_BIBLIOMETRICS_DATA_ERROR: used in metrics + # VE_SERVICES_DATA_ERROR: used in reports + # VE_DATA_CONSISTENCY_ERROR: data consistency + # VE_CRITERIA_ISSUES_ERROR: required by the criteria document + + error_category_and_function_items = ( + (choices.VE_BIBLIOMETRICS_DATA_ERROR, validate_affiliations), + (choices.VE_BIBLIOMETRICS_DATA_ERROR, validate_authors), + (choices.VE_BIBLIOMETRICS_DATA_ERROR, validate_languages), + (choices.VE_CRITERIA_ISSUES_ERROR, validate_article_attributes), + (choices.VE_CRITERIA_ISSUES_ERROR, validate_data_availability), + (choices.VE_CRITERIA_ISSUES_ERROR, validate_licenses), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_article_id_other), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_article_languages), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_article_type), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_dates), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_doi), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_journal), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_preprint), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_related_articles), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_subjects), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_toc_sections), + (choices.VE_DATA_CONSISTENCY_ERROR, validate_xref), ) - for f in functions: - yield from f(sps_pkg_name, xmltree, data) + for error_category, f in error_category_and_function_items: + for item in f(sps_pkg_name, xmltree, data): + if item["validation_type"] in ("value in list", "value", "match"): + error_category = choices.VE_DATA_CONSISTENCY_ERROR + item["error_category"] = item.get("error_category") or error_category + yield item def validate_affiliations(sps_pkg_name, xmltree, data): From 07eb66c3bb54818dcf5235299bbfef816df2d662 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:04:24 -0300 Subject: [PATCH 21/25] =?UTF-8?q?Refatora=20upload=20parte=203=20-=20agrup?= =?UTF-8?q?a=20em=20uma=20tarefa=20as=20valida=C3=A7=C3=B5es:=20assets,=20?= =?UTF-8?q?renditions,=20conte=C3=BAdo=20do=20XML=20(#398)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes --- upload/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/upload/tasks.py b/upload/tasks.py index 01e7839a..4a0eac0b 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -27,6 +27,7 @@ from .utils import file_utils, package_utils, xml_utils from upload.models import Package +from upload.xml_validation import validate_xml_content, add_app_data, add_sps_data, add_journal_data User = get_user_model() From afd5cef869e3d9a7910fd60a881a849976b40cf8 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:13:29 -0300 Subject: [PATCH 22/25] =?UTF-8?q?Refatora=20upload=20parte=203=20-=20agrup?= =?UTF-8?q?a=20em=20uma=20tarefa=20as=20valida=C3=A7=C3=B5es:=20assets,=20?= =?UTF-8?q?renditions,=20conte=C3=BAdo=20do=20XML=20(#399)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes From 29fa764070510ef919d4438fd50bf6a8c4d7ff93 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 10 Mar 2024 13:50:36 -0300 Subject: [PATCH 23/25] =?UTF-8?q?Refatora=20upload=20parte=202=20-=20Adici?= =?UTF-8?q?ona=20fun=C3=A7=C3=B5es=20em=20upload.controller=20para=20avali?= =?UTF-8?q?ar=20o=20pacote=20rec=C3=A9m=20recebido=20(#400)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cria os upload.choices.VE_UNEXPECTED_ERROR e VE_FORBIDDEN_UPDATE_ERROR * Cria/Edita Package.get, create_or_update, _add_validation_result * Cria funções para avaliar o XML recém-recebido (é esperado? os dados de journal e issue estão corretos?) * Cria testes para upload.controller.* * Adiciona a migração de banco de dados por criar novos valores de choices * Corrige ausência de definição de variáveis --- upload/controller.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/upload/controller.py b/upload/controller.py index 013e9d36..a6f8ea89 100644 --- a/upload/controller.py +++ b/upload/controller.py @@ -144,7 +144,8 @@ def receive_package(package): return response except GetXMLItemsError as exc: # identifica os erros do arquivo Zip / XML - return _identify_file_error(package) + _identify_file_error(package) + return package def _identify_file_error(package): From 614776be8249c5e84093b3436e584e4fa31e5387 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 31 Mar 2024 14:17:44 -0300 Subject: [PATCH 24/25] Atualiza main_ingress com main (#425) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Corrige Institution.__str__, adiciona atributos de autocomplete e altera InstitutionHistory.panels de FieldPanel para Autocomplete (#401) * Faz correções na app journal: adiciona Journal.title, wagtail_hooks.JournalCreateView, etc (#402) * Adiciona Journal.title * Modifica os atributos de journal.models.Owner e Publisher * Cria journal.wagtail.JournalCreateView para adicionar o usuário como creator * Adiciona migrações de banco de dados relacionados a journal * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos (#403) * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos, criando uma amostragem de migração * Adiciona os parâmetros journal_acron e publication_year * Garante que no XML migrado (seja nativo ou gerado a partir do HTML) tenha o PID v2 e o order (article-id other) (#405) * Corrige ou adiciona ao XML o elemento pid-v2 usando como valor o pid do artigo do site clássico * Atualiza packtools versão 3.4.0 para ter XMLWithPre.order Corrige ou adiciona ao XML o elemento article-id (other/order) usando como valor os últimos 5 dígitos do pid do artigo do site clássico * Atualiza a versão da biblioteca scielo_classic_website para 1.6.4 para corrigir a obtenção de registros de artigos em serial xml * Evita guardar versões anteriores dos arquivos * Cria o procedimento de corrigir o valor do Pid v2 (#410) * Cria PidProviderXML.fix_pid_v2 * Cria FixPidV2 para controlar o que foi corrigido no upload e no core * Cria FixPidV2ModelAdmin * Adiciona PidProviderAPIClient.fix_pid_v2, fix_pid_v2_url. Refatora PidProviderAPIClient.enabled * Cria APIPidProviderFixPidV2Error * Cria provider.requester.PidRequester.fix_pid_v2 * Cria SPSPkg.fix_pid_v2 * Cria ArticleProc.fix_pid_v2 e adiciona a chamada no procedimento de generate_sps_package * Cria tarefas para corriger o valor de pid v2 em PidProviderXML a partir de ArticleProc.pid * Cria provider.provider.PidProvider com os métodos fix_pid_v2, get_sps_pkg_name, get_xmltree * Adiciona a migração correspondente ao modelo FixPidV2 * Corrige ausencia de pid v3 no xml submetido do upload para o core (#411) * Atualiza a versão de packtools 4.1.1 para usar XMLWithPre.data e .files * Modifica PidProviderXML.is_registered para atualizar os pids de xml_with_pre com os valores registrados, além disso, era necessário retornar se está registrado e igual ou registrado e diferente ou não registrado * Distingue status de demanda de registro e status do registro * Modifica PidProviderAPIClient._process_post_xml_response para atualizar ou não os valores dos pids de xml_with_pre com os valores fornecidos pelo Core * Adiciona registered_in_core como filtro de PidProviderXMLModelAdmin * Atualiza dependencias base.txt e production.txt (#409) * Comenta app captcha * Atualiza dependencias --------- Co-authored-by: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> * Modifica comportamento de Pid provider, que passa a aceitar mudanças de pids (#415) * Cria PidProviderXML.complete_pids, que completa pids com registrados ou inéditos * Cria PidProviderXML._check_pids, que valida pid do XML é inédito e/ou registrado e/ou pertencente a outro documento * Cria PidProviderXML.get_pids, que retorna todos os pids vigentes e outros * Corrige PidProviderXML._is_registered_pid, adicionando a verificação em OtherPid * Corrige PidProviderXML._get_unique_v3, que usa _is_registered_pid e agora não precisa verificar OtherPid * Ajusta PidProviderXML._add_other_pid * Remove PidProviderXML._complete_pids excedente * Corrige PidProvider._add_pid_v3 e _add_pid_v2 * Corrige PidProviderXML.is_registered * Ajusta PidProviderXML._save, removendo _add_other_pid e removendo change_pids * Modifica PidProviderXML.register * Melhora XMLVersion.__str__, mostrando nome do arquivo + data no lugar de pid v3 * Melhora _process_post_xml_response * Para PidProvider.provide_pid_for_xml_with_pre, adiciona parâmetro caller, completa XML com pids registrados se ausentes no XML, adiciona xml_changed ao retorno * Adiciona comando para completar XML com pids registrados antes de solicitar pid para Core * Cria meio de configurar / habilitar / desabilitar fix_pid_v2 do Core (#416) * Cria a classe PidProviderEndpoint, inline de PidProviderConfig * Modifica o modo de obter fix_pid_v2_url * Adiciona modelo PidProviderEndpoint * Adiciona 'fixed_in_core': False ao retorno de fix_pid_v2 (#417) * Evita que SPSPkg armazene arquivos em excesso (#418) * Verifica se xml registrado e xml recebido são iguais, somente após completar XML com os pids registrados (#419) * Compara se xml_with_pre é igual ao registrado somente após adicionar os pids registrados se aplicável * Adiciona a funcionalidade de forçar o registro no Core mesmo que o registro está indicando que já está sincronizado * Melhora ordem dos itens do menu (#408) * Refatora a funcionalidade da ordem do menu * Reordena menu itens padrao do wagtail e remove algum deless * Insere funcao get_menu_order em menu_order * Altera a ordem dos app * Move as operações anteriores de ArticleProc, IssueProc, JournalProc para um arquivo (#420) * Cria o modelo ArticleProcReport e ArticleProcReportModelAdmin * Cria o modelo ProcReport para armazenar processamentos anteriores, mantendo apenas o vigente nos respectivos ArticleProc, IssueProc, JournalProc * Adiciona as migrações de banco de dados * Melhora o registro das operações das tarefas relacionadas à migração e publicação (#422) * Melhora os rótulos, deixa todos os campos não editáveis, apresenta os eventos do mais recente para o mais antigo * Adiciona Article.data, Issue.data, Journal.data * Adiciona retorno às função que criam instâncias de Article, Issue e Journal * Adiciona Article.data, Issue.data, Journal.data nos detalhes das operações de entrada de dados * Aplica black * Adiciona * Adiciona mais detalhes ao registro da tarefa de gerar o XML a partir do HTML * Adiciona mais detalhes ao registro da tarefa de gerar o pacote SPS * Corrige o valor de 'completed' dos resultados das operações de solicitação de pid v3 * Adiciona o parâmetro compression em ZipFile * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Modifica o sps_pkg_status para DONE se o pacote não tem todos os texts * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Corrige ausência de importação de ZIP_DEFLATED * Adiciona o atributo order para a listagem dos itens na área administrativa * Adiciona as migrações de banco de dados * Adiciona detalhes do processamento da adição de arquivos no minio * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#398) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 2 - Adiciona funções em upload.controller para avaliar o pacote recém recebido (#400) * Cria os upload.choices.VE_UNEXPECTED_ERROR e VE_FORBIDDEN_UPDATE_ERROR * Cria/Edita Package.get, create_or_update, _add_validation_result * Cria funções para avaliar o XML recém-recebido (é esperado? os dados de journal e issue estão corretos?) * Cria testes para upload.controller.* * Adiciona a migração de banco de dados por criar novos valores de choices * Corrige ausência de definição de variáveis * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes --------- Co-authored-by: Samuel Veiga Rangel <82840278+samuelveigarangel@users.noreply.github.com> From 4ed97ec08f3eab192a0ba435e4d98816d0e21089 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> Date: Sun, 31 Mar 2024 15:23:59 -0300 Subject: [PATCH 25/25] =?UTF-8?q?Modifica=20a=20entrada=20do=20pacote=20pe?= =?UTF-8?q?lo=20upload=20e=20adiciona=20novas=20valida=C3=A7=C3=B5es=20do?= =?UTF-8?q?=20XML=20(#426)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Corrige Institution.__str__, adiciona atributos de autocomplete e altera InstitutionHistory.panels de FieldPanel para Autocomplete (#401) * Faz correções na app journal: adiciona Journal.title, wagtail_hooks.JournalCreateView, etc (#402) * Adiciona Journal.title * Modifica os atributos de journal.models.Owner e Publisher * Cria journal.wagtail.JournalCreateView para adicionar o usuário como creator * Adiciona migrações de banco de dados relacionados a journal * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos (#403) * Adiciona filtros de journal_acron e publication_year para migrar dados de artigos, criando uma amostragem de migração * Adiciona os parâmetros journal_acron e publication_year * Garante que no XML migrado (seja nativo ou gerado a partir do HTML) tenha o PID v2 e o order (article-id other) (#405) * Corrige ou adiciona ao XML o elemento pid-v2 usando como valor o pid do artigo do site clássico * Atualiza packtools versão 3.4.0 para ter XMLWithPre.order Corrige ou adiciona ao XML o elemento article-id (other/order) usando como valor os últimos 5 dígitos do pid do artigo do site clássico * Atualiza a versão da biblioteca scielo_classic_website para 1.6.4 para corrigir a obtenção de registros de artigos em serial xml * Evita guardar versões anteriores dos arquivos * Cria o procedimento de corrigir o valor do Pid v2 (#410) * Cria PidProviderXML.fix_pid_v2 * Cria FixPidV2 para controlar o que foi corrigido no upload e no core * Cria FixPidV2ModelAdmin * Adiciona PidProviderAPIClient.fix_pid_v2, fix_pid_v2_url. Refatora PidProviderAPIClient.enabled * Cria APIPidProviderFixPidV2Error * Cria provider.requester.PidRequester.fix_pid_v2 * Cria SPSPkg.fix_pid_v2 * Cria ArticleProc.fix_pid_v2 e adiciona a chamada no procedimento de generate_sps_package * Cria tarefas para corriger o valor de pid v2 em PidProviderXML a partir de ArticleProc.pid * Cria provider.provider.PidProvider com os métodos fix_pid_v2, get_sps_pkg_name, get_xmltree * Adiciona a migração correspondente ao modelo FixPidV2 * Corrige ausencia de pid v3 no xml submetido do upload para o core (#411) * Atualiza a versão de packtools 4.1.1 para usar XMLWithPre.data e .files * Modifica PidProviderXML.is_registered para atualizar os pids de xml_with_pre com os valores registrados, além disso, era necessário retornar se está registrado e igual ou registrado e diferente ou não registrado * Distingue status de demanda de registro e status do registro * Modifica PidProviderAPIClient._process_post_xml_response para atualizar ou não os valores dos pids de xml_with_pre com os valores fornecidos pelo Core * Adiciona registered_in_core como filtro de PidProviderXMLModelAdmin * Atualiza dependencias base.txt e production.txt (#409) * Comenta app captcha * Atualiza dependencias --------- Co-authored-by: Roberta Takenaka <505143+robertatakenaka@users.noreply.github.com> * Modifica comportamento de Pid provider, que passa a aceitar mudanças de pids (#415) * Cria PidProviderXML.complete_pids, que completa pids com registrados ou inéditos * Cria PidProviderXML._check_pids, que valida pid do XML é inédito e/ou registrado e/ou pertencente a outro documento * Cria PidProviderXML.get_pids, que retorna todos os pids vigentes e outros * Corrige PidProviderXML._is_registered_pid, adicionando a verificação em OtherPid * Corrige PidProviderXML._get_unique_v3, que usa _is_registered_pid e agora não precisa verificar OtherPid * Ajusta PidProviderXML._add_other_pid * Remove PidProviderXML._complete_pids excedente * Corrige PidProvider._add_pid_v3 e _add_pid_v2 * Corrige PidProviderXML.is_registered * Ajusta PidProviderXML._save, removendo _add_other_pid e removendo change_pids * Modifica PidProviderXML.register * Melhora XMLVersion.__str__, mostrando nome do arquivo + data no lugar de pid v3 * Melhora _process_post_xml_response * Para PidProvider.provide_pid_for_xml_with_pre, adiciona parâmetro caller, completa XML com pids registrados se ausentes no XML, adiciona xml_changed ao retorno * Adiciona comando para completar XML com pids registrados antes de solicitar pid para Core * Cria meio de configurar / habilitar / desabilitar fix_pid_v2 do Core (#416) * Cria a classe PidProviderEndpoint, inline de PidProviderConfig * Modifica o modo de obter fix_pid_v2_url * Adiciona modelo PidProviderEndpoint * Adiciona 'fixed_in_core': False ao retorno de fix_pid_v2 (#417) * Evita que SPSPkg armazene arquivos em excesso (#418) * Verifica se xml registrado e xml recebido são iguais, somente após completar XML com os pids registrados (#419) * Compara se xml_with_pre é igual ao registrado somente após adicionar os pids registrados se aplicável * Adiciona a funcionalidade de forçar o registro no Core mesmo que o registro está indicando que já está sincronizado * Melhora ordem dos itens do menu (#408) * Refatora a funcionalidade da ordem do menu * Reordena menu itens padrao do wagtail e remove algum deless * Insere funcao get_menu_order em menu_order * Altera a ordem dos app * Move as operações anteriores de ArticleProc, IssueProc, JournalProc para um arquivo (#420) * Cria o modelo ArticleProcReport e ArticleProcReportModelAdmin * Cria o modelo ProcReport para armazenar processamentos anteriores, mantendo apenas o vigente nos respectivos ArticleProc, IssueProc, JournalProc * Adiciona as migrações de banco de dados * Melhora o registro das operações das tarefas relacionadas à migração e publicação (#422) * Melhora os rótulos, deixa todos os campos não editáveis, apresenta os eventos do mais recente para o mais antigo * Adiciona Article.data, Issue.data, Journal.data * Adiciona retorno às função que criam instâncias de Article, Issue e Journal * Adiciona Article.data, Issue.data, Journal.data nos detalhes das operações de entrada de dados * Aplica black * Adiciona * Adiciona mais detalhes ao registro da tarefa de gerar o XML a partir do HTML * Adiciona mais detalhes ao registro da tarefa de gerar o pacote SPS * Corrige o valor de 'completed' dos resultados das operações de solicitação de pid v3 * Adiciona o parâmetro compression em ZipFile * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Modifica o sps_pkg_status para DONE se o pacote não tem todos os texts * Modifica o sps_pkg_status para PENDING se o pacote não tem todos os texts * Corrige ausência de importação de ZIP_DEFLATED * Adiciona o atributo order para a listagem dos itens na área administrativa * Adiciona as migrações de banco de dados * Adiciona detalhes do processamento da adição de arquivos no minio * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#398) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Refatora upload parte 2 - Adiciona funções em upload.controller para avaliar o pacote recém recebido (#400) * Cria os upload.choices.VE_UNEXPECTED_ERROR e VE_FORBIDDEN_UPDATE_ERROR * Cria/Edita Package.get, create_or_update, _add_validation_result * Cria funções para avaliar o XML recém-recebido (é esperado? os dados de journal e issue estão corretos?) * Cria testes para upload.controller.* * Adiciona a migração de banco de dados por criar novos valores de choices * Corrige ausência de definição de variáveis * Refatora upload parte 3 - agrupa em uma tarefa as validações: assets, renditions, conteúdo do XML (#399) * Cria a tarefa upload.tasks.task_validate_original_zip_file * Cria upload.tasks.task_validate_xml_content * Cria upload.xml_validation * Anota TODO para inserir parâmetros para as validações * Atualiza packtools para a versão 3.3.4 que contempla mais validações * Remove package.tasks * Adiciona importações faltantes * Aplica black * Cria função para associar os tipos de erros com os relatórios e faz ajustes nos tipos de erros * Associa por inferência o tipo de impacto de cada tipo de erro * Refatora Package.check_opinions e check_resolutions; Remove article e issue do formulário * Corrige defeitos das validações iniciais à recepção do pacote e ajusta a validação do conteúdo do XML * Remove a verificação de article e issue no formulário * Troca a tarefa que executará as validações --------- Co-authored-by: Samuel Veiga Rangel <82840278+samuelveigarangel@users.noreply.github.com> --- upload/controller.py | 3 +-- upload/tasks.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/upload/controller.py b/upload/controller.py index a6f8ea89..013e9d36 100644 --- a/upload/controller.py +++ b/upload/controller.py @@ -144,8 +144,7 @@ def receive_package(package): return response except GetXMLItemsError as exc: # identifica os erros do arquivo Zip / XML - _identify_file_error(package) - return package + return _identify_file_error(package) def _identify_file_error(package): diff --git a/upload/tasks.py b/upload/tasks.py index 4a0eac0b..01e7839a 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -27,7 +27,6 @@ from .utils import file_utils, package_utils, xml_utils from upload.models import Package -from upload.xml_validation import validate_xml_content, add_app_data, add_sps_data, add_journal_data User = get_user_model()