Skip to content

Commit

Permalink
A fix for encountering missing PubmedData fields (#178)
Browse files Browse the repository at this point in the history
Fix #173
  • Loading branch information
caufieldjh authored Aug 17, 2023
2 parents b43762b + fc9b8f7 commit 63ddc0c
Showing 1 changed file with 14 additions and 11 deletions.
25 changes: 14 additions & 11 deletions src/ontogpt/clients/pubmed_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,17 +464,20 @@ def parse_pmxml(self, xml: str, raw: bool, autoformat: bool, pubmedcentral: bool
pmid = pa.find("PMID").text
pmc_id = ""
has_pmc_id = False
if (
pa.find("PubmedData").find("ArticleIdList").find("ArticleId", {"IdType": "pmc"})
and pubmedcentral
):
pmc_id = (
pa.find("PubmedData")
.find("ArticleIdList")
.find("ArticleId", {"IdType": "pmc"})
.text
)
has_pmc_id = True
try: # There's a chance that this entry is missing one or more fields below
if (
pa.find("PubmedData").find("ArticleIdList").find("ArticleId", {"IdType": "pmc"})
and pubmedcentral
):
pmc_id = (
pa.find("PubmedData")
.find("ArticleIdList")
.find("ArticleId", {"IdType": "pmc"})
.text
)
has_pmc_id = True
except AttributeError:
logging.info(f"PubMed entry {pmid} is missing the expected PubMedData fields.")
if autoformat and not raw and not has_pmc_id: # No PMC ID - just use title+abstract
ti = ""
if pa.find("ArticleTitle"):
Expand Down

0 comments on commit 63ddc0c

Please sign in to comment.