From ddf1957d49627fe83266f459d86b95f3a494a90b Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 29 Oct 2024 08:54:28 -0300 Subject: [PATCH 01/11] =?UTF-8?q?Corrige=20a=20verifica=C3=A7=C3=A3o=20do?= =?UTF-8?q?=20status=20de=20execu=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tracker/choices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/choices.py b/tracker/choices.py index 3909406a..63b891b3 100644 --- a/tracker/choices.py +++ b/tracker/choices.py @@ -46,4 +46,4 @@ def allowed_to_run(status, force_update): - return force_update and status in PROGRESS_STATUS_FORCE_UPDATE or status in PROGRESS_STATUS_TODO + return force_update and status in PROGRESS_STATUS_FORCE_UPDATE or status in PROGRESS_STATUS_REGULAR_TODO From bc5ecead38a29bc012cabe0ca84627e6baa4df35 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 29 Oct 2024 08:54:36 -0300 Subject: [PATCH 02/11] Corrige: troca Article.order por Article.position --- article/wagtail_hooks.py | 1 - publication/api/document.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py index 02045317..9b4383f6 100644 --- a/article/wagtail_hooks.py +++ b/article/wagtail_hooks.py @@ -43,7 +43,6 @@ class ArticleModelAdmin(ModelAdmin): "pid_v3", "status", "display_sections", - "order", "fpage", "position", "first_publication_date", diff --git a/publication/api/document.py b/publication/api/document.py index dcd96cc5..8da45628 100644 --- a/publication/api/document.py +++ b/publication/api/document.py @@ -24,7 +24,7 @@ def publish_article(article_proc, api_data, journal_pid=None): raise ValueError( "publication.api.document.publish_article requires journal_pid") - order = article_proc.article.order + order = article_proc.article.position pub_date = article_proc.article.first_publication_date or datetime.utcnow() build_article(builder, article_proc.article, journal_pid, order, pub_date) From 08eea328a0cbd1fcde6915bf51acd2856b529d92 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 29 Oct 2024 08:54:43 -0300 Subject: [PATCH 03/11] =?UTF-8?q?Atualiza=20a=20biblioteca=20scielo=5Fmigr?= =?UTF-8?q?ation=201.7.5=20para=20lidar=20com=20caracteres=20especiais=20e?= =?UTF-8?q?m=20refer=C3=AAncias?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements/base.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/base.txt b/requirements/base.txt index cf5ba4bc..a028ef06 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -72,7 +72,7 @@ mongoengine==0.28.2 aiohttp==3.9.1 # DSM Migration # ------------------------------------------------------------------------------ --e git+https://github.com/scieloorg/scielo_migration.git@1.7.4#egg=scielo_classic_website +-e git+https://github.com/scieloorg/scielo_migration.git@1.7.5#egg=scielo_classic_website python-dateutil==2.8.2 tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability From f98cfc08d4ad33f968e09db98c512791f1c5b674 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 29 Oct 2024 08:54:50 -0300 Subject: [PATCH 04/11] =?UTF-8?q?Corrige=20conte=C3=BAdo=20padr=C3=A3o=20d?= =?UTF-8?q?e=20xml=20body=20e=20back=20quando=20ausentes=20e=20melhora=20'?= =?UTF-8?q?log'=20da=20convers=C3=A3o=20de=20html=20para=20xml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- htmlxml/models.py | 85 +++++++++++++++++++++++++++++++++++++---------- proc/models.py | 9 +++-- 2 files changed, 75 insertions(+), 19 deletions(-) diff --git a/htmlxml/models.py b/htmlxml/models.py index bd018293..3162d7d9 100644 --- a/htmlxml/models.py +++ b/htmlxml/models.py @@ -1,5 +1,6 @@ import logging import os +import sys from django.core.files.base import ContentFile from django.db import models @@ -503,6 +504,17 @@ class Meta: models.Index(fields=["migrated_article"]), ] + @property + def data(self): + return { + "html2xml_status": self.html2xml_status, + "n_paragraphs": self.n_paragraphs, + "n_references": self.n_references, + "record_types": self.record_types, + "html_translation_langs": self.html_translation_langs, + "pdf_langs": self.pdf_langs, + } + @property def directory_path(self): return f"classic_website/{self.migrated_article.collection.acron}/html2xml/{self.migrated_article.path}" @@ -566,6 +578,7 @@ def html_to_xml( body_and_back_xml, ): try: + op = article_proc.start(user, "html_to_xml") self.html2xml_status = tracker_choices.PROGRESS_STATUS_DOING self.html_translation_langs = "-".join( sorted(article_proc.translations.keys()) @@ -580,6 +593,7 @@ def html_to_xml( ) self.save() + detail = {} document = Document(article_proc.migrated_data.data) document._translated_html_by_lang = article_proc.translations @@ -588,18 +602,37 @@ def html_to_xml( ) xml_content = self._generate_xml_from_html(user, article_proc, document) - if xml_content and body_and_back: + detail = {"xml_content": bool(xml_content), "body_and_back": bool(body_and_back)} + completed = bool(xml_content and body_and_back) + if completed: self.html2xml_status = tracker_choices.PROGRESS_STATUS_DONE - elif xml_content: - self.html2xml_status = tracker_choices.PROGRESS_STATUS_PENDING else: - self.html2xml_status = tracker_choices.PROGRESS_STATUS_BLOCKED + self.html2xml_status = tracker_choices.PROGRESS_STATUS_PENDING self.save() + + op.finish( + user, + completed=completed, + exception=None, + message_type=None, + message=None, + exc_traceback=None, + detail=detail, + ) except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + self.html2xml_status = tracker_choices.PROGRESS_STATUS_BLOCKED self.save() - raise e - self.generate_report(user, article_proc) + op.finish( + user, + completed=False, + exception=e, + message_type=None, + message=None, + exc_traceback=exc_traceback, + detail=detail, + ) return xml_content @property @@ -617,8 +650,9 @@ def latest_bb_file(self): return "" def generate_report(self, user, article_proc): - op = article_proc.start(user, "generate html xml report") + op = article_proc.start(user, "html_to_xml: generate report") try: + detail = {} html = _fromstring(self.first_bb_file) for xml_with_pre in XMLWithPre.create(path=self.file.path): @@ -641,28 +675,36 @@ def generate_report(self, user, article_proc): }, ) except Exception as e: - op.finish(user, completed=False, detail={"error": str(e)}) + exc_type, exc_value, exc_traceback = sys.exc_info() + op.finish( + user, + completed=False, + exception=e, + message_type=None, + message=None, + exc_traceback=exc_traceback, + detail=detail, + ) def _generate_xml_body_and_back(self, user, article_proc, document): """ Generate XML body and back from html_translation_langs and p records """ done = False - operation = article_proc.start(user, "generate xml body and back") + operation = article_proc.start(user, "html_to_xml: generate xml body + back") languages = document._translated_html_by_lang detail = {} detail.update(languages) + try: document.generate_body_and_back_from_html(languages) done = True + # guarda cada versão de body/back except GenerateBodyAndBackFromHTMLError as e: - # cria xml_body_and_back padrão - document.xml_body_and_back = ["
"] - detail = {"warning": str(e)} + document.xml_body_and_back = ["
"] done = False - # guarda cada versão de body/back if document.xml_body_and_back: for i, xml_body_and_back in enumerate(document.xml_body_and_back, start=1): BodyAndBackFile.create_or_update( @@ -677,7 +719,7 @@ def _generate_xml_body_and_back(self, user, article_proc, document): return done def _generate_xml_from_html(self, user, article_proc, document): - operation = article_proc.start(user, "_generate_xml_from_html") + operation = article_proc.start(user, "html_to_xml: merge front + body + back") xml_content = None detail = {} try: @@ -685,10 +727,19 @@ def _generate_xml_from_html(self, user, article_proc, document): xml_file = article_proc.pkg_name + ".xml" self.save_file(xml_file, xml_content) detail["xml"] = xml_file + operation.finish(user, bool(xml_content), detail=detail) + return xml_content except Exception as e: - detail = {"error": str(e)} - operation.finish(user, bool(xml_content), detail=detail) - return xml_content + exc_type, exc_value, exc_traceback = sys.exc_info() + operation.finish( + user, + completed=False, + exception=e, + message_type=None, + message=None, + exc_traceback=exc_traceback, + detail=detail, + ) def save_report(self, content): # content = json.dumps(data) diff --git a/proc/models.py b/proc/models.py index 67628a22..5386c209 100644 --- a/proc/models.py +++ b/proc/models.py @@ -1423,6 +1423,9 @@ def get_xml(self, user, body_and_back_xml): self.migrated_data.file_type = self.migrated_data.document.file_type self.migrated_data.save() + detail = {} + detail["file_type"] = self.migrated_data.file_type + if self.migrated_data.file_type == "html": migrated_data = self.migrated_data classic_ws_doc = migrated_data.document @@ -1433,17 +1436,19 @@ def get_xml(self, user, body_and_back_xml): record_types="|".join(classic_ws_doc.record_types or []), ) htmlxml.html_to_xml(user, self, body_and_back_xml) + htmlxml.generate_report(user, self) + detail.update(htmlxml.data) xml = get_migrated_xml_with_pre(self) - if xml: self.xml_status = tracker_choices.PROGRESS_STATUS_DONE + detail.update(xml.data) else: self.xml_status = tracker_choices.PROGRESS_STATUS_REPROC self.save() completed = self.xml_status == tracker_choices.PROGRESS_STATUS_DONE - operation.finish(user, completed=completed, detail=xml and xml.data) + operation.finish(user, completed=completed, detail=detail) return completed except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() From 3cd76ac92875e8723f57eaa19f50b3a5b5b53d77 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Tue, 29 Oct 2024 08:54:58 -0300 Subject: [PATCH 05/11] =?UTF-8?q?Melhora=20a=20identifica=C3=A7=C3=A3o=20d?= =?UTF-8?q?o=20artigo=20na=20=C3=A1rea=20administrativa?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/wagtail_hooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/proc/wagtail_hooks.py b/proc/wagtail_hooks.py index 1d5e18d4..592662bf 100644 --- a/proc/wagtail_hooks.py +++ b/proc/wagtail_hooks.py @@ -198,6 +198,7 @@ class ArticleProcModelAdmin(ModelAdmin): edit_view_class = ProcEditView list_per_page = 10 list_display = ( + "__str__", "pkg_name", "issue_proc", "xml_status", From e1d6069be4d2d200b4f2fb14c5dd889f2c1b0969 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sat, 9 Nov 2024 11:07:35 -0300 Subject: [PATCH 06/11] =?UTF-8?q?Corrige=20aus=C3=AAncia=20de=20declara?= =?UTF-8?q?=C3=A7=C3=A3o=20de=20vari=C3=A1veis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- htmlxml/models.py | 5 ++++- proc/controller.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/htmlxml/models.py b/htmlxml/models.py index 3162d7d9..731b2695 100644 --- a/htmlxml/models.py +++ b/htmlxml/models.py @@ -578,6 +578,7 @@ def html_to_xml( body_and_back_xml, ): try: + detail = {} op = article_proc.start(user, "html_to_xml") self.html2xml_status = tracker_choices.PROGRESS_STATUS_DOING self.html_translation_langs = "-".join( @@ -619,6 +620,8 @@ def html_to_xml( exc_traceback=None, detail=detail, ) + return xml_content + except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -633,7 +636,7 @@ def html_to_xml( exc_traceback=exc_traceback, detail=detail, ) - return xml_content + @property def first_bb_file(self): diff --git a/proc/controller.py b/proc/controller.py index ed02f40b..6042a8ac 100644 --- a/proc/controller.py +++ b/proc/controller.py @@ -329,7 +329,7 @@ def migrate_journal( "task": "proc.controller.migrate_journal", "user_id": user.id, "username": user.username, - "collection": collection.acron, + "collection": journal_proc.collection.acron, "pid": journal_proc.pid, "issue_filter": issue_filter, "force_update": force_update, From 9d68ce89535a82083d2ff3bbb556a14172243e32 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sat, 9 Nov 2024 11:07:35 -0300 Subject: [PATCH 07/11] =?UTF-8?q?Corrige=20aus=C3=AAncia=20de=20declara?= =?UTF-8?q?=C3=A7=C3=A3o=20de=20vari=C3=A1veis=20e=20evita=20publicar=20ar?= =?UTF-8?q?tigo=20se=20n=C3=A3o=20criou=20registro=20Article?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/tasks.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/proc/tasks.py b/proc/tasks.py index 5a344c9d..1c0590dd 100644 --- a/proc/tasks.py +++ b/proc/tasks.py @@ -191,10 +191,10 @@ def task_migrate_and_publish( content_type="article", collection=collection, force_update=force_update, - params=issue_filter, + params=article_filter, ) api_data = get_api_data(collection, "article", website_kind) - logging.info(f"publish_articles: {issue_filter} {items.count()}") + logging.info(f"publish_articles: {article_filter} {items.count()}") for article_proc in items: task_publish_article.apply_async( kwargs=dict( @@ -525,6 +525,7 @@ def task_migrate_and_publish_issues( "collection": collection.acron, "pid": issue_proc.pid, "force_update": force_update, + "force_migrate_document_records": force_migrate_document_records, }, ) @@ -542,7 +543,7 @@ def task_migrate_and_publish_issues( "publication_year": publication_year, "issue_folder": issue_folder, "force_update": force_update, - "force_import": force_import, + "force_migrate_document_records": force_migrate_document_records, }, ) @@ -602,8 +603,8 @@ def task_publish_issues( exc_traceback=exc_traceback, detail={ "task": "proc.tasks.publish_issues", - "user_id": user.id, - "username": user.username, + "user_id": user_id, + "username": username, "collection": collection.acron, "pid": issue_proc.pid, "force_update": force_update, @@ -698,7 +699,9 @@ def task_migrate_and_publish_articles( logging.info(list(ArticleProc.items_to_process_info(items))) for article_proc in items: - article_proc.migrate_article(user, force_update) + article = article_proc.migrate_article(user, force_update) + if not article: + continue task_publish_article.apply_async( kwargs=dict( From 100181d91efcec25ad86c5f6fd1f15e6ada8d373 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sat, 9 Nov 2024 11:14:16 -0300 Subject: [PATCH 08/11] Aumenta JournalSection.text.max_length de 100 para 200 --- core/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/models.py b/core/models.py index 9c5e53f5..7659b7a4 100644 --- a/core/models.py +++ b/core/models.py @@ -105,7 +105,7 @@ def data(self): class TextModel(BaseTextModel): - text = models.CharField(max_length=100, null=False, blank=False) + text = models.CharField(max_length=200, null=False, blank=False) panels = [FieldPanel("text"), FieldPanel("language")] From abf510006bc694319b032976e8850af989dd1cb2 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sat, 9 Nov 2024 11:20:35 -0300 Subject: [PATCH 09/11] Adiciona journal.migrations.0006_alter_journalsection_text --- .../0006_alter_journalsection_text.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 journal/migrations/0006_alter_journalsection_text.py diff --git a/journal/migrations/0006_alter_journalsection_text.py b/journal/migrations/0006_alter_journalsection_text.py new file mode 100644 index 00000000..2fe76762 --- /dev/null +++ b/journal/migrations/0006_alter_journalsection_text.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.3 on 2024-11-09 14:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("journal", "0005_officialjournal_next_journal_title_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="journalsection", + name="text", + field=models.CharField(max_length=200), + ), + ] From 3d5f32526f210a6f9cf437cfd5e239a201b22c39 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sat, 9 Nov 2024 11:23:44 -0300 Subject: [PATCH 10/11] Cria State.fix_values para atribuir corretamente name e acronym de State --- location/models.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/location/models.py b/location/models.py index 66e2c4c2..b6cae5a1 100644 --- a/location/models.py +++ b/location/models.py @@ -85,8 +85,26 @@ def get(cls, name=None, acronym=None): return cls.objects.filter(name__iexact=name, acronym=acronym).first() raise ValueError(f"State.get missing params {dict(name__iexact=name, acronym=acronym)}") + @staticmethod + def add(data, value): + if value: + if len(value) == 2 and value.upper() == value: + data["acronym"] = value + else: + data["name"] = value + + @staticmethod + def fix_values(name, acronym): + params = {} + State.add(params, name) + State.add(params, acronym) + return params + @classmethod def create(cls, user, name=None, acronym=None): + fixed = State.fix_values(name, acronym) + name = fixed.get("name") + acronym = fixed.get("acronym") if name or acronym: try: obj = cls() @@ -101,6 +119,9 @@ def create(cls, user, name=None, acronym=None): @classmethod def get_or_create(cls, user, name=None, acronym=None): + fixed = State.fix_values(name, acronym) + name = fixed.get("name") + acronym = fixed.get("acronym") try: return cls.get(name, acronym) except cls.DoesNotExist: From f2d8cfb2b99295ae277464501fc2e1180f0ea768 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Sat, 9 Nov 2024 11:30:18 -0300 Subject: [PATCH 11/11] =?UTF-8?q?Adiciona=20mais=20detalhes=20ao=20log=20d?= =?UTF-8?q?e=20cria=C3=A7=C3=A3o=20zip=20otimizado=20do=20SPSPkg?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package/models.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/package/models.py b/package/models.py index 60d028e1..d54ce23a 100644 --- a/package/models.py +++ b/package/models.py @@ -508,7 +508,7 @@ def create_or_update( obj.texts = texts obj.save() - obj.save_pkg_zip_file(user, sps_pkg_zip_path) + obj.save_pkg_zip_file(user, sps_pkg_zip_path, article_proc) obj.upload_package_to_the_cloud(user, original_pkg_components, article_proc) obj.validate(True) @@ -550,7 +550,10 @@ def is_complete(self): @property def data(self): return dict( + is_complete=self.is_complete, registered_in_core=self.registered_in_core, + valid_texts=self.valid_texts, + valid_components=self.valid_components, texts=self.texts, components=[item.data for item in self.components.all()], ) @@ -622,7 +625,8 @@ def add_pid_v3_to_zip(cls, user, zip_xml_file_path, is_public, article_proc): f"Unable to add pid v3 to {zip_xml_file_path}, got {response}. Exception {type(e)} {e}" ) - def save_pkg_zip_file(self, user, zip_file_path): + def save_pkg_zip_file(self, user, zip_file_path, article_proc): + operation = article_proc.start(user, "save_pkg_zip_file") filename = self.sps_pkg_name + ".zip" try: with TemporaryDirectory() as targetdir: @@ -634,10 +638,29 @@ def save_pkg_zip_file(self, user, zip_file_path): # saved optimised with open(target, "rb") as fp: self.save_file(filename, fp.read()) + operation.finish( + user, + completed=True, + detail={"source": target, "optimized": True}, + ) except Exception as e: # saved original - with open(zip_file_path, "rb") as fp: - self.save_file(filename, fp.read()) + try: + with open(zip_file_path, "rb") as fp: + self.save_file(filename, fp.read()) + operation.finish( + user, + completed=True, + detail={"source": zip_file_path, "optimized": False, "message": str(e)}, + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + operation.finish( + user, + exc_traceback=exc_traceback, + exception=e, + detail={"source": zip_file_path, "optimized": False}, + ) def save_file(self, name, content): try: @@ -884,4 +907,4 @@ def get_zip_filename_and_content(self): with open(self.file.path, "rb") as fp: d["content"] = fp.read() d["filename"] = self.sps_pkg_name + ".zip" - return d + return d \ No newline at end of file