Skip to content

Commit

Permalink
Merge pull request #321 from gitnnolabs/tk315
Browse files Browse the repository at this point in the history
Adiciona a capacidade de armazenar o material suplementar.
  • Loading branch information
gitnnolabs authored Feb 21, 2022
2 parents 66c6e7e + 474f105 commit 0e0eb3a
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 4 deletions.
70 changes: 68 additions & 2 deletions airflow/dags/operations/sync_kernel_to_website_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def ArticleFactory(
document_xml_url: str,
repeated_doc_pids=None,
fetch_document_xml: callable = None,
fetch_documents_manifest: callable = None,
) -> models.Article:
"""Cria uma instância de artigo a partir dos dados de entrada.
Expand All @@ -97,6 +98,7 @@ def ArticleFactory(
document_order (int): Posição do artigo.
document_xml_url (str): URL do XML do artigo
fetch_document_xml (callable): Função para obter o XML do Kernel caso
fetch_document_xml (callable): Função para obter o JSON Manifest do Kernel caso
necessário.
Returns:
Expand Down Expand Up @@ -514,6 +516,63 @@ def _get_related_articles(xml):
for related_dict in sps_package.related_articles:
_update_related_articles(article, related_dict)

def _update_suppl_material(document_id, filename, url):
"""
Atualiza os material suplementar.
Return a suplementary material dict.
{
"url" : "https://minio.scielo.br/documentstore/2237-9622/d6DyD7CHXbpTJbLq7NQQNdq/5d88e2211c5357e2a9d8caeac2170f4f3d1305d1.pdf"
"filename": "suppl01.pdf"
}
"""

suppl_data = {
"url": url,
"filename": filename
}

mat_suppl_entity = models.MatSuppl(**suppl_data)

try:
# Verifica se é uma atualização.
_article = models.Article.objects.get(_id=document_id)
except models.Article.DoesNotExist as ex:
# Caso não seja uma atualização
return models.MatSuppl(**suppl_data)
else:
# É uma atualização
# Mantém a unicidade da atualização do material suplementar
if mat_suppl_entity not in _article.mat_suppl:
_article.mat_suppl += [mat_suppl_entity]
return _article.mat_suppl
else:
return _article.mat_suppl

def _get_suppl_material(article, json):
"""
Obtém a lista de material suplementar do JSON do Manifest do Kernel e caso existe atualiza a entidade MatSuppl.
Tags no XML o material suplementar: ["inline-supplementary-material", "supplementary-material"]:
<inline-supplementary-material xlink:href="1678-8060-mioc-116-e210259-s.pdf">Supplementary data
</inline-supplementary-material>
<supplementary-material id="suppl01" mimetype="application" mime-subtype="pdf" xlink:href="1234-5678-rctb-45-05-0110-suppl01.pdf"/>
"""
# check if exist a supplementary_material
logging.info("Checking if exists supplementary material....")

assets = _nestget(json, "versions", 0, "assets")
suppls = [k for k in assets.keys() if 'suppl' in k]

if any(suppls):
logging.info("Exists supplementary material: %s" %
(' '.join(suppls)))
for key, asset in assets.items():
if key in suppls:
return _update_suppl_material(article,
filename=key, url=_nestget(asset, 0, 1))

article.authors = list(_get_article_authors(data))
article.authors_meta = _get_article_authors_meta(data)
article.languages = list(_get_languages(data))
Expand Down Expand Up @@ -564,6 +623,11 @@ def _get_related_articles(xml):
article.order = _get_order(document_order, article.pid)
article.xml = document_xml_url

# Cadastra o material suplementar
if fetch_documents_manifest:
json = fetch_documents_manifest(document_id)
article.mat_suppl = _get_suppl_material(document_id, json)

# Se for uma errata ou retratação ou adendo ou comentário de artigo.
if article.type in ["correction", "retraction", "addendum", "article-commentary"]:
# Obtém o XML da errada no kernel
Expand All @@ -584,7 +648,8 @@ def try_register_documents(
get_relation_data: callable,
fetch_document_front: callable,
article_factory: callable,
fetch_document_xml: callable,
fetch_document_xml: callable = None,
fetch_documents_manifest: callable = None,
) -> List[str]:
"""Registra documentos do Kernel na base de dados do `OPAC`.
Expand Down Expand Up @@ -637,7 +702,8 @@ def try_register_documents(
item.get("order"),
document_xml_url,
repeated_doc_pids,
fetch_document_xml
fetch_document_xml,
fetch_documents_manifest
)
document.save()
logging.info("ARTICLE saved %s %s" % (document_id, issue_id))
Expand Down
10 changes: 9 additions & 1 deletion airflow/dags/sync_kernel_to_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ def fetch_documents_xml(document_id):
return fetch_data("/documents/%s" % (document_id), json=False)


def fetch_documents_manifest(document_id):
"""
Obtém o XML do Document do Kernel com base no parametro 'document_id'
"""
return fetch_data("/documents/%s/manifest" % (document_id), json=True)


def _get_relation_data_from_kernel_bundle(document_id, front_data=None):
"""
Obtém os dados do documento no bundle
Expand Down Expand Up @@ -747,7 +754,7 @@ def _get_known_documents(**kwargs) -> Dict[str, List[str]]:
)

orphans = try_register_documents(
documents_to_get, _get_relation_data, fetch_documents_front, ArticleFactory, fetch_documents_xml,
documents_to_get, _get_relation_data, fetch_documents_front, ArticleFactory, fetch_documents_xml, fetch_documents_manifest,
)

Variable.set("orphan_documents", orphans, serialize_json=True)
Expand Down Expand Up @@ -916,6 +923,7 @@ def register_last_issues(ds, **kwargs):
except AttributeError:
logging.info("No issues are registered to models.Journal: %s " % journal)


def must_send_email(ds, **kwargs):
"""If IS_SPORADIC == True return False to avoid send e-mail,
but if IS_SPORADIC == False, return True to send e-mail.
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ deepdiff[murmur]==4.0.7
feedparser==5.2.1
beautifulsoup4==4.9.0
git+https://github.com/scieloorg/[email protected]#egg=xylose
git+https://github.com/scieloorg/opac_schema.git@v2.60#egg=opac_schema
git+https://github.com/scieloorg/opac_schema.git@v2.65#egg=opac_schema
git+https://github.com/scieloorg/[email protected]#egg=packtools
aiohttp==3.6.2

0 comments on commit 0e0eb3a

Please sign in to comment.