Skip to content

Commit

Permalink
Merge pull request #564 from robertatakenaka/rc_merge_main-2024-11-09
Browse files Browse the repository at this point in the history
Mantém o branch rc compatível com main
  • Loading branch information
robertatakenaka authored Nov 9, 2024
2 parents 069c4f6 + 21a186c commit c3c6976
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 34 deletions.
2 changes: 1 addition & 1 deletion core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def data(self):


class TextModel(BaseTextModel):
text = models.CharField(max_length=100, null=False, blank=False)
text = models.CharField(max_length=200, null=False, blank=False)

panels = [FieldPanel("text"), FieldPanel("language")]

Expand Down
90 changes: 72 additions & 18 deletions htmlxml/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import sys

from django.core.files.base import ContentFile
from django.db import models
Expand Down Expand Up @@ -503,6 +504,17 @@ class Meta:
models.Index(fields=["migrated_article"]),
]

@property
def data(self):
return {
"html2xml_status": self.html2xml_status,
"n_paragraphs": self.n_paragraphs,
"n_references": self.n_references,
"record_types": self.record_types,
"html_translation_langs": self.html_translation_langs,
"pdf_langs": self.pdf_langs,
}

@property
def directory_path(self):
return f"classic_website/{self.migrated_article.collection.acron}/html2xml/{self.migrated_article.path}"
Expand Down Expand Up @@ -566,6 +578,8 @@ def html_to_xml(
body_and_back_xml,
):
try:
detail = {}
op = article_proc.start(user, "html_to_xml")
self.html2xml_status = tracker_choices.PROGRESS_STATUS_DOING
self.html_translation_langs = "-".join(
sorted(article_proc.translations.keys())
Expand All @@ -580,6 +594,7 @@ def html_to_xml(
)
self.save()

detail = {}
document = Document(article_proc.migrated_data.data)
document._translated_html_by_lang = article_proc.translations

Expand All @@ -588,19 +603,40 @@ def html_to_xml(
)
xml_content = self._generate_xml_from_html(user, article_proc, document)

if xml_content and body_and_back:
detail = {"xml_content": bool(xml_content), "body_and_back": bool(body_and_back)}
completed = bool(xml_content and body_and_back)
if completed:
self.html2xml_status = tracker_choices.PROGRESS_STATUS_DONE
elif xml_content:
self.html2xml_status = tracker_choices.PROGRESS_STATUS_PENDING
else:
self.html2xml_status = tracker_choices.PROGRESS_STATUS_BLOCKED
self.html2xml_status = tracker_choices.PROGRESS_STATUS_PENDING
self.save()

op.finish(
user,
completed=completed,
exception=None,
message_type=None,
message=None,
exc_traceback=None,
detail=detail,
)
return xml_content

except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()

self.html2xml_status = tracker_choices.PROGRESS_STATUS_BLOCKED
self.save()
raise e
self.generate_report(user, article_proc)
return xml_content
op.finish(
user,
completed=False,
exception=e,
message_type=None,
message=None,
exc_traceback=exc_traceback,
detail=detail,
)


@property
def first_bb_file(self):
Expand All @@ -617,8 +653,9 @@ def latest_bb_file(self):
return ""

def generate_report(self, user, article_proc):
op = article_proc.start(user, "generate html xml report")
op = article_proc.start(user, "html_to_xml: generate report")
try:
detail = {}
html = _fromstring(self.first_bb_file)

for xml_with_pre in XMLWithPre.create(path=self.file.path):
Expand All @@ -641,28 +678,36 @@ def generate_report(self, user, article_proc):
},
)
except Exception as e:
op.finish(user, completed=False, detail={"error": str(e)})
exc_type, exc_value, exc_traceback = sys.exc_info()
op.finish(
user,
completed=False,
exception=e,
message_type=None,
message=None,
exc_traceback=exc_traceback,
detail=detail,
)

def _generate_xml_body_and_back(self, user, article_proc, document):
"""
Generate XML body and back from html_translation_langs and p records
"""
done = False
operation = article_proc.start(user, "generate xml body and back")
operation = article_proc.start(user, "html_to_xml: generate xml body + back")

languages = document._translated_html_by_lang
detail = {}
detail.update(languages)

try:
document.generate_body_and_back_from_html(languages)
done = True
# guarda cada versão de body/back
except GenerateBodyAndBackFromHTMLError as e:
# cria xml_body_and_back padrão
document.xml_body_and_back = ["<article/>"]
detail = {"warning": str(e)}
document.xml_body_and_back = ["<article><body/><back/></article>"]
done = False

# guarda cada versão de body/back
if document.xml_body_and_back:
for i, xml_body_and_back in enumerate(document.xml_body_and_back, start=1):
BodyAndBackFile.create_or_update(
Expand All @@ -677,18 +722,27 @@ def _generate_xml_body_and_back(self, user, article_proc, document):
return done

def _generate_xml_from_html(self, user, article_proc, document):
operation = article_proc.start(user, "_generate_xml_from_html")
operation = article_proc.start(user, "html_to_xml: merge front + body + back")
xml_content = None
detail = {}
try:
xml_content = document.generate_full_xml(None).decode("utf-8")
xml_file = article_proc.pkg_name + ".xml"
self.save_file(xml_file, xml_content)
detail["xml"] = xml_file
operation.finish(user, bool(xml_content), detail=detail)
return xml_content
except Exception as e:
detail = {"error": str(e)}
operation.finish(user, bool(xml_content), detail=detail)
return xml_content
exc_type, exc_value, exc_traceback = sys.exc_info()
operation.finish(
user,
completed=False,
exception=e,
message_type=None,
message=None,
exc_traceback=exc_traceback,
detail=detail,
)

def save_report(self, content):
# content = json.dumps(data)
Expand Down
17 changes: 17 additions & 0 deletions journal/migrations/0006_alter_journalsection_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 5.0.3 on 2024-11-09 14:19

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("journal", "0005_officialjournal_next_journal_title_and_more"),
]

operations = [
migrations.AlterField(
model_name="journalsection",
name="text",
field=models.CharField(max_length=200),
),
]
21 changes: 21 additions & 0 deletions location/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,26 @@ def get(cls, name=None, acronym=None):
return cls.objects.filter(name__iexact=name, acronym=acronym).first()
raise ValueError(f"State.get missing params {dict(name__iexact=name, acronym=acronym)}")

@staticmethod
def add(data, value):
if value:
if len(value) == 2 and value.upper() == value:
data["acronym"] = value
else:
data["name"] = value

@staticmethod
def fix_values(name, acronym):
params = {}
State.add(params, name)
State.add(params, acronym)
return params

@classmethod
def create(cls, user, name=None, acronym=None):
fixed = State.fix_values(name, acronym)
name = fixed.get("name")
acronym = fixed.get("acronym")
if name or acronym:
try:
obj = cls()
Expand All @@ -101,6 +119,9 @@ def create(cls, user, name=None, acronym=None):

@classmethod
def get_or_create(cls, user, name=None, acronym=None):
fixed = State.fix_values(name, acronym)
name = fixed.get("name")
acronym = fixed.get("acronym")
try:
return cls.get(name, acronym)
except cls.DoesNotExist:
Expand Down
30 changes: 25 additions & 5 deletions package/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def create_or_update(
obj.texts = texts
obj.save()

obj.save_pkg_zip_file(user, sps_pkg_zip_path)
obj.save_pkg_zip_file(user, sps_pkg_zip_path, article_proc)

obj.upload_package_to_the_cloud(user, original_pkg_components, article_proc)
obj.validate(True)
Expand Down Expand Up @@ -625,7 +625,8 @@ def add_pid_v3_to_zip(cls, user, zip_xml_file_path, is_public, article_proc):
f"Unable to add pid v3 to {zip_xml_file_path}, got {response}. Exception {type(e)} {e}"
)

def save_pkg_zip_file(self, user, zip_file_path):
def save_pkg_zip_file(self, user, zip_file_path, article_proc):
operation = article_proc.start(user, "save_pkg_zip_file")
filename = self.sps_pkg_name + ".zip"
try:
with TemporaryDirectory() as targetdir:
Expand All @@ -637,10 +638,29 @@ def save_pkg_zip_file(self, user, zip_file_path):
# saved optimised
with open(target, "rb") as fp:
self.save_file(filename, fp.read())
operation.finish(
user,
completed=True,
detail={"source": target, "optimized": True},
)
except Exception as e:
# saved original
with open(zip_file_path, "rb") as fp:
self.save_file(filename, fp.read())
try:
with open(zip_file_path, "rb") as fp:
self.save_file(filename, fp.read())
operation.finish(
user,
completed=True,
detail={"source": zip_file_path, "optimized": False, "message": str(e)},
)
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
operation.finish(
user,
exc_traceback=exc_traceback,
exception=e,
detail={"source": zip_file_path, "optimized": False},
)

def save_file(self, name, content):
try:
Expand Down Expand Up @@ -887,4 +907,4 @@ def get_zip_filename_and_content(self):
with open(self.file.path, "rb") as fp:
d["content"] = fp.read()
d["filename"] = self.sps_pkg_name + ".zip"
return d
return d
9 changes: 7 additions & 2 deletions proc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,9 @@ def get_xml(self, user, body_and_back_xml):
self.migrated_data.file_type = self.migrated_data.document.file_type
self.migrated_data.save()

detail = {}
detail["file_type"] = self.migrated_data.file_type

if self.migrated_data.file_type == "html":
migrated_data = self.migrated_data
classic_ws_doc = migrated_data.document
Expand All @@ -1439,17 +1442,19 @@ def get_xml(self, user, body_and_back_xml):
record_types="|".join(classic_ws_doc.record_types or []),
)
htmlxml.html_to_xml(user, self, body_and_back_xml)
htmlxml.generate_report(user, self)
detail.update(htmlxml.data)

xml = get_migrated_xml_with_pre(self)

if xml:
self.xml_status = tracker_choices.PROGRESS_STATUS_DONE
detail.update(xml.data)
else:
self.xml_status = tracker_choices.PROGRESS_STATUS_REPROC
self.save()

completed = self.xml_status == tracker_choices.PROGRESS_STATUS_DONE
operation.finish(user, completed=completed, detail=xml and xml.data)
operation.finish(user, completed=completed, detail=detail)
return completed
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
Expand Down
15 changes: 9 additions & 6 deletions proc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ def task_migrate_and_publish(
content_type="article",
collection=collection,
force_update=force_update,
params=issue_filter,
params=article_filter,
)
api_data = get_api_data(collection, "article", website_kind)
logging.info(f"publish_articles: {issue_filter} {items.count()}")
logging.info(f"publish_articles: {article_filter} {items.count()}")
for article_proc in items:
task_publish_article.apply_async(
kwargs=dict(
Expand Down Expand Up @@ -525,6 +525,7 @@ def task_migrate_and_publish_issues(
"collection": collection.acron,
"pid": issue_proc.pid,
"force_update": force_update,
"force_migrate_document_records": force_migrate_document_records,
},
)

Expand All @@ -542,7 +543,7 @@ def task_migrate_and_publish_issues(
"publication_year": publication_year,
"issue_folder": issue_folder,
"force_update": force_update,
"force_import": force_import,
"force_migrate_document_records": force_migrate_document_records,
},
)

Expand Down Expand Up @@ -603,8 +604,8 @@ def task_publish_issues(
exc_traceback=exc_traceback,
detail={
"task": "proc.tasks.publish_issues",
"user_id": user.id,
"username": user.username,
"user_id": user_id,
"username": username,
"collection": collection.acron,
"pid": issue_proc.pid,
"force_update": force_update,
Expand Down Expand Up @@ -699,7 +700,9 @@ def task_migrate_and_publish_articles(
logging.info(list(ArticleProc.items_to_process_info(items)))

for article_proc in items:
article_proc.migrate_article(user, force_update)
article = article_proc.migrate_article(user, force_update)
if not article:
continue

task_publish_article.apply_async(
kwargs=dict(
Expand Down
1 change: 1 addition & 0 deletions proc/wagtail_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ class ArticleProcModelAdmin(ModelAdmin):
edit_view_class = ProcEditView
list_per_page = 10
list_display = (
"__str__",
"pkg_name",
"issue_proc",
"xml_status",
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ mongoengine==0.28.2
aiohttp==3.9.1
# DSM Migration
# ------------------------------------------------------------------------------
-e git+https://github.com/scieloorg/[email protected].4#egg=scielo_classic_website
-e git+https://github.com/scieloorg/[email protected].5#egg=scielo_classic_website
python-dateutil==2.8.2
tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability

Expand Down
Loading

0 comments on commit c3c6976

Please sign in to comment.