diff --git a/CHANGELOG.md b/CHANGELOG.md index 2073b82de19..1d36e856eb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - 'Get full text' now also checks the file url. [#568](https://github.com/koppor/jabref/issues/568) - We refined the 'main directory not found' error message. [#9625](https://github.com/JabRef/jabref/pull/9625) - We modified the `Add Group` dialog to use the most recently selected group hierarchical context [#9141](https://github.com/JabRef/jabref/issues/9141) +- We improved the Medline importer to correctly import ISO dates for `revised`. [#9536](https://github.com/JabRef/jabref/issues/9536) diff --git a/build.gradle b/build.gradle index 474c551e287..30c74c3e46d 100644 --- a/build.gradle +++ b/build.gradle @@ -259,7 +259,6 @@ processResources { task generateSource(dependsOn: ["generateBstGrammarSource", "generateSearchGrammarSource", - "generateMedlineSource", "generateBibtexmlSource", "generateEndnoteSource", "generateModsSource", @@ -290,15 +289,6 @@ tasks.register("generateSearchGrammarSource", JavaExec) { args = ["-o","src-gen/main/java/org/jabref/search" , "-visitor", "-no-listener", "-package", "org.jabref.search", "$projectDir/src/main/antlr4/org/jabref/search/Search.g4"] } -task generateMedlineSource(type: XjcTask) { - group = 'JabRef' - description = "Generates java files for the medline importer." - - schemaFile = "src/main/resources/xjc/medline/medline.xsd" - outputDirectory = "src-gen/main/java" - javaPackage = "org.jabref.logic.importer.fileformat.medline" -} - task generateBibtexmlSource(type: XjcTask) { group = 'JabRef' description = "Generates java files for the bibtexml importer." diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java index b9e9eac39d4..4522c34429f 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java @@ -4,7 +4,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; @@ -15,62 +14,24 @@ import java.util.Objects; import java.util.Optional; +import javax.xml.XMLConstants; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import javax.xml.stream.events.XMLEvent; import org.jabref.logic.importer.Importer; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.ParserResult; -import org.jabref.logic.importer.fileformat.medline.Abstract; -import org.jabref.logic.importer.fileformat.medline.AbstractText; -import org.jabref.logic.importer.fileformat.medline.AffiliationInfo; import org.jabref.logic.importer.fileformat.medline.ArticleId; -import org.jabref.logic.importer.fileformat.medline.ArticleIdList; -import org.jabref.logic.importer.fileformat.medline.ArticleTitle; -import org.jabref.logic.importer.fileformat.medline.Author; -import org.jabref.logic.importer.fileformat.medline.AuthorList; -import org.jabref.logic.importer.fileformat.medline.Book; -import org.jabref.logic.importer.fileformat.medline.BookDocument; -import org.jabref.logic.importer.fileformat.medline.BookTitle; -import org.jabref.logic.importer.fileformat.medline.Chemical; -import org.jabref.logic.importer.fileformat.medline.ContributionDate; -import org.jabref.logic.importer.fileformat.medline.DateCompleted; -import org.jabref.logic.importer.fileformat.medline.DateCreated; -import org.jabref.logic.importer.fileformat.medline.DateRevised; -import org.jabref.logic.importer.fileformat.medline.ELocationID; -import org.jabref.logic.importer.fileformat.medline.GeneSymbolList; -import org.jabref.logic.importer.fileformat.medline.GeneralNote; -import org.jabref.logic.importer.fileformat.medline.ISSN; import org.jabref.logic.importer.fileformat.medline.Investigator; -import org.jabref.logic.importer.fileformat.medline.InvestigatorList; -import org.jabref.logic.importer.fileformat.medline.Journal; -import org.jabref.logic.importer.fileformat.medline.JournalIssue; -import org.jabref.logic.importer.fileformat.medline.Keyword; -import org.jabref.logic.importer.fileformat.medline.KeywordList; -import org.jabref.logic.importer.fileformat.medline.MedlineCitation; -import org.jabref.logic.importer.fileformat.medline.MedlineJournalInfo; import org.jabref.logic.importer.fileformat.medline.MeshHeading; -import org.jabref.logic.importer.fileformat.medline.MeshHeadingList; -import org.jabref.logic.importer.fileformat.medline.OtherID; -import org.jabref.logic.importer.fileformat.medline.Pagination; +import org.jabref.logic.importer.fileformat.medline.OtherId; import org.jabref.logic.importer.fileformat.medline.PersonalNameSubject; -import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectList; -import org.jabref.logic.importer.fileformat.medline.PubDate; -import org.jabref.logic.importer.fileformat.medline.PublicationType; -import org.jabref.logic.importer.fileformat.medline.Publisher; -import org.jabref.logic.importer.fileformat.medline.PubmedArticle; -import org.jabref.logic.importer.fileformat.medline.PubmedArticleSet; -import org.jabref.logic.importer.fileformat.medline.PubmedBookArticle; -import org.jabref.logic.importer.fileformat.medline.PubmedBookArticleSet; -import org.jabref.logic.importer.fileformat.medline.PubmedBookData; -import org.jabref.logic.importer.fileformat.medline.QualifierName; -import org.jabref.logic.importer.fileformat.medline.Section; -import org.jabref.logic.importer.fileformat.medline.Sections; -import org.jabref.logic.importer.fileformat.medline.Text; import org.jabref.logic.util.StandardFileType; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.Date; import org.jabref.model.entry.Month; import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.FieldFactory; @@ -80,10 +41,6 @@ import org.jabref.model.strings.StringUtil; import com.google.common.base.Joiner; -import jakarta.xml.bind.JAXBContext; -import jakarta.xml.bind.JAXBElement; -import jakarta.xml.bind.JAXBException; -import jakarta.xml.bind.Unmarshaller; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,7 +55,6 @@ public class MedlineImporter extends Importer implements Parser { private static final String KEYWORD_SEPARATOR = "; "; private static final Locale ENGLISH = Locale.ENGLISH; - private Unmarshaller unmarshaller; private static String join(List list, String string) { return Joiner.on(string).join(list); @@ -140,130 +96,294 @@ public boolean isRecognizedFormat(BufferedReader reader) throws IOException { } @Override - public ParserResult importDatabase(BufferedReader reader) throws IOException { - Objects.requireNonNull(reader); + public ParserResult importDatabase(BufferedReader input) throws IOException { + Objects.requireNonNull(input); List bibItems = new ArrayList<>(); try { - Object unmarshalledObject = unmarshallRoot(reader); - - // check whether we have an article set, an article, a book article or a book article set - if (unmarshalledObject instanceof PubmedArticleSet) { - PubmedArticleSet articleSet = (PubmedArticleSet) unmarshalledObject; - for (Object article : articleSet.getPubmedArticleOrPubmedBookArticle()) { - if (article instanceof PubmedArticle) { - PubmedArticle currentArticle = (PubmedArticle) article; - parseArticle(currentArticle, bibItems); - } - if (article instanceof PubmedBookArticle) { - PubmedBookArticle currentArticle = (PubmedBookArticle) article; - parseBookArticle(currentArticle, bibItems); + XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); + + // prevent xxe (https://rules.sonarsource.com/java/RSPEC-2755) + xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + // required for reading Unicode characters such as ö + xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, true); + + XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(input); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "PubmedArticle" -> { + parseArticle(reader, bibItems, elementName); + } + case "PubmedBookArticle" -> { + parseBookArticle(reader, bibItems, elementName); + } } } - } else if (unmarshalledObject instanceof PubmedArticle) { - PubmedArticle article = (PubmedArticle) unmarshalledObject; - parseArticle(article, bibItems); - } else if (unmarshalledObject instanceof PubmedBookArticle) { - PubmedBookArticle currentArticle = (PubmedBookArticle) unmarshalledObject; - parseBookArticle(currentArticle, bibItems); - } else { - PubmedBookArticleSet bookArticleSet = (PubmedBookArticleSet) unmarshalledObject; - for (PubmedBookArticle bookArticle : bookArticleSet.getPubmedBookArticle()) { - parseBookArticle(bookArticle, bibItems); - } } - } catch (JAXBException | XMLStreamException e) { + } catch (XMLStreamException e) { LOGGER.debug("could not parse document", e); return ParserResult.fromError(e); } + return new ParserResult(bibItems); } - private Object unmarshallRoot(BufferedReader reader) throws JAXBException, XMLStreamException { - initUmarshaller(); + private void parseBookArticle(XMLStreamReader reader, List bibItems, String startElement) + throws XMLStreamException { + Map fields = new HashMap<>(); - XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); - XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(reader); + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "BookDocument" -> { + parseBookDocument(reader, fields, elementName); + } + case "PublicationStatus" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.PUBSTATE, reader.getText()); + } + } + } + } - // go to the root element - while (!xmlStreamReader.isStartElement()) { - xmlStreamReader.next(); + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } } - return unmarshaller.unmarshal(xmlStreamReader); - } + BibEntry entry = new BibEntry(StandardEntryType.Article); + entry.setField(fields); - private void initUmarshaller() throws JAXBException { - if (unmarshaller == null) { - // Lazy init because this is expensive - JAXBContext context = JAXBContext.newInstance("org.jabref.logic.importer.fileformat.medline"); - unmarshaller = context.createUnmarshaller(); - } + bibItems.add(entry); } - private void parseBookArticle(PubmedBookArticle currentArticle, List bibItems) { - Map fields = new HashMap<>(); - if (currentArticle.getBookDocument() != null) { - BookDocument bookDocument = currentArticle.getBookDocument(); - fields.put(StandardField.PMID, bookDocument.getPMID().getContent()); - if (bookDocument.getDateRevised() != null) { - DateRevised dateRevised = bookDocument.getDateRevised(); - addDateRevised(fields, dateRevised); - } - if (bookDocument.getAbstract() != null) { - Abstract abs = bookDocument.getAbstract(); - addAbstract(fields, abs); - } - if (bookDocument.getPagination() != null) { - Pagination pagination = bookDocument.getPagination(); - addPagination(fields, pagination); - } - if (bookDocument.getSections() != null) { - ArrayList result = new ArrayList<>(); - Sections sections = bookDocument.getSections(); - for (Section section : sections.getSection()) { - for (Serializable content : section.getSectionTitle().getContent()) { - if (content instanceof String) { - result.add((String) content); + private void parseBookDocument(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + // multiple occurrences of the following fields can be present + List sectionTitleList = new ArrayList<>(); + List keywordList = new ArrayList<>(); + List publicationTypeList = new ArrayList<>(); + List articleTitleList = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "PMID" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + fields.put(StandardField.PMID, reader.getText()); } } + case "DateRevised", "ContributionDate" -> { + parseDate(reader, fields, elementName); + } + case "Abstract" -> { + addAbstract(reader, fields, elementName); + } + case "Pagination" -> { + addPagination(reader, fields, elementName); + } + case "Section" -> { + parseSections(reader, sectionTitleList); + } + case "Keyword" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + keywordList.add(reader.getText()); + } + } + case "PublicationType" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + publicationTypeList.add(reader.getText()); + } + } + case "ArticleTitle" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + articleTitleList.add(reader.getText()); + } + } + case "Book" -> { + parseBookInformation(reader, fields, elementName); + } } - fields.put(new UnknownField("sections"), join(result, "; ")); } - if (bookDocument.getKeywordList() != null) { - addKeyWords(fields, bookDocument.getKeywordList()); - } - if (bookDocument.getContributionDate() != null) { - addContributionDate(fields, bookDocument.getContributionDate()); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } - if (bookDocument.getPublicationType() != null) { - List result = new ArrayList<>(); - for (PublicationType type : bookDocument.getPublicationType()) { - if (type.getContent() != null) { - result.add(type.getContent()); + } + + // populate multiple occurrence fields + if (!sectionTitleList.isEmpty()) { + fields.put(new UnknownField("sections"), join(sectionTitleList, "; ")); + } + addKeywords(fields, keywordList); + if (!publicationTypeList.isEmpty()) { + fields.put(new UnknownField("pubtype"), join(publicationTypeList, ", ")); + } + if (!articleTitleList.isEmpty()) { + fields.put(new UnknownField("article"), join(articleTitleList, ", ")); + } + } + + private void parseBookInformation(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + List isbnList = new ArrayList<>(); + List titleList = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "PublisherName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.PUBLISHER, reader.getText()); + } + } + case "PublisherLocation" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("publocation"), reader.getText()); + } + } + case "BookTitle" -> { + handleTextElement(reader, titleList, elementName); + } + case "PubDate" -> { + addPubDate(reader, fields, elementName); + } + case "AuthorList" -> { + handleAuthorList(reader, fields, elementName); + } + case "Volume" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.VOLUME, reader.getText()); + } + } + case "Edition" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.EDITION, reader.getText()); + } + } + case "Medium" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("medium"), reader.getText()); + } + } + case "ReportNumber" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("reportnumber"), reader.getText()); + } + } + case "ELocationID" -> { + String eidType = reader.getAttributeValue(null, "EIdType"); + reader.next(); + if (isCharacterXMLEvent(reader)) { + handleElocationId(fields, reader, eidType); + } + } + case "Isbn" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + isbnList.add(reader.getText()); + } } } - fields.put(new UnknownField("pubtype"), join(result, ", ")); } - if (bookDocument.getArticleTitle() != null) { - ArticleTitle articleTitle = bookDocument.getArticleTitle(); - ArrayList titles = new ArrayList<>(); - for (Serializable content : articleTitle.getContent()) { - if (content instanceof String) { - titles.add((String) content); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } + } + + if (!isbnList.isEmpty()) { + fields.put(StandardField.ISBN, join(isbnList, ", ")); + } + + if (!titleList.isEmpty()) { + putIfValueNotNull(fields, StandardField.TITLE, join(titleList, " ")); + } + } + + private void handleElocationId(Map fields, XMLStreamReader reader, String eidType) { + if (eidType.equals("doi")) { + fields.put(StandardField.DOI, reader.getText()); + } + if (eidType.equals("pii")) { + fields.put(new UnknownField("pii"), reader.getText()); + } + } + + private void parseSections(XMLStreamReader reader, List sectionTitleList) throws XMLStreamException { + int sectionLevel = 0; + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "SectionTitle" -> { + reader.next(); + if (isCharacterXMLEvent(reader) && sectionLevel == 0) { + // we only collect SectionTitles from root level Sections + sectionTitleList.add(reader.getText()); + } + } + case "Section" -> { + sectionLevel++; } } - fields.put(new UnknownField("article"), join(titles, ", ")); } - if (bookDocument.getBook() != null) { - addBookInformation(fields, bookDocument.getBook()); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Section")) { + if (sectionLevel == 0) { + break; + } else { + sectionLevel--; + } } } + } + + private void parseArticle(XMLStreamReader reader, List bibItems, String startElement) + throws XMLStreamException { + Map fields = new HashMap<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "MedlineCitation" -> { + parseMedlineCitation(reader, fields, elementName); + } + case "PubmedData" -> { + parsePubmedData(reader, fields, elementName); + } + } + } - if (currentArticle.getPubmedBookData() != null) { - PubmedBookData bookData = currentArticle.getPubmedBookData(); - putIfValueNotNull(fields, StandardField.PUBSTATE, bookData.getPublicationStatus()); + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } } BibEntry entry = new BibEntry(StandardEntryType.Article); @@ -272,373 +392,752 @@ private void parseBookArticle(PubmedBookArticle currentArticle, List b bibItems.add(entry); } - private void addBookInformation(Map fields, Book book) { - if (book.getPublisher() != null) { - Publisher publisher = book.getPublisher(); - putIfValueNotNull(fields, new UnknownField("publocation"), publisher.getPublisherLocation()); - putStringFromSerializableList(fields, StandardField.PUBLISHER, publisher.getPublisherName().getContent()); - } - if (book.getBookTitle() != null) { - BookTitle title = book.getBookTitle(); - putStringFromSerializableList(fields, StandardField.TITLE, title.getContent()); - } - if (book.getPubDate() != null) { - addPubDate(fields, book.getPubDate()); - } - if (book.getAuthorList() != null) { - List authorLists = book.getAuthorList(); - // authorLists size should be one - if (authorLists.size() == 1) { - for (AuthorList authorList : authorLists) { - handleAuthors(fields, authorList); + private void parsePubmedData(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + String publicationStatus = ""; + List articleIdList = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "PublicationStatus" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + publicationStatus = reader.getText(); + } + } + case "ArticleId" -> { + String idType = reader.getAttributeValue(null, "IdType"); + reader.next(); + if (isCharacterXMLEvent(reader)) { + articleIdList.add(new ArticleId(idType, reader.getText())); + } + } } - } else { - LOGGER.info(String.format("Size of authorlist was %s", authorLists.size())); } - } - putIfValueNotNull(fields, StandardField.VOLUME, book.getVolume()); - putIfValueNotNull(fields, StandardField.EDITION, book.getEdition()); - putIfValueNotNull(fields, new UnknownField("medium"), book.getMedium()); - putIfValueNotNull(fields, new UnknownField("reportnumber"), book.getReportNumber()); - - if (book.getELocationID() != null) { - for (ELocationID id : book.getELocationID()) { - addElocationID(fields, id); + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } - if (book.getIsbn() != null) { - fields.put(StandardField.ISBN, join(book.getIsbn(), ", ")); + + if (fields.get(new UnknownField("revised")) != null) { + putIfValueNotNull(fields, StandardField.PUBSTATE, publicationStatus); + if (!articleIdList.isEmpty()) { + addArticleIdList(fields, articleIdList); + } } } - private void putStringFromSerializableList(Map fields, Field field, List contentList) { - StringBuilder result = new StringBuilder(); - for (Serializable content : contentList) { - if (content instanceof String) { - result.append((String) content); + private void parseMedlineCitation(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + // multiple occurrences of the following fields can be present + List citationSubsets = new ArrayList<>(); + List meshHeadingList = new ArrayList<>(); + List personalNameSubjectList = new ArrayList<>(); + List otherIdList = new ArrayList<>(); + List keywordList = new ArrayList<>(); + List spaceFlightMissionList = new ArrayList<>(); + List investigatorList = new ArrayList<>(); + List generalNoteList = new ArrayList<>(); + + String status = reader.getAttributeValue(null, "Status"); + String owner = reader.getAttributeValue(null, "Owner"); + int latestVersion = 0; + fields.put(new UnknownField("status"), status); + fields.put(StandardField.OWNER, owner); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "DateCreated", "DateCompleted", "DateRevised" -> { + parseDate(reader, fields, elementName); + } + case "Article" -> { + parseArticleInformation(reader, fields); + } + case "PMID" -> { + String versionStr = reader.getAttributeValue(null, "Version"); + reader.next(); + if (versionStr != null) { + int version = Integer.parseInt(versionStr); + if (isCharacterXMLEvent(reader) && version > latestVersion) { + latestVersion = version; + fields.put(StandardField.PMID, reader.getText()); + } + } + } + case "MedlineJournalInfo" -> { + parseMedlineJournalInfo(reader, fields, elementName); + } + case "ChemicalList" -> { + parseChemicalList(reader, fields, elementName); + } + case "CitationSubset" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + citationSubsets.add(reader.getText()); + } + } + case "GeneSymbolList" -> { + parseGeneSymbolList(reader, fields, elementName); + } + case "MeshHeading" -> { + parseMeshHeading(reader, meshHeadingList, elementName); + } + case "NumberOfReferences" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("references"), reader.getText()); + } + } + case "PersonalNameSubject" -> { + parsePersonalNameSubject(reader, personalNameSubjectList, elementName); + } + case "OtherID" -> { + String otherIdSource = reader.getAttributeValue(null, "Source"); + reader.next(); + if (isCharacterXMLEvent(reader)) { + String content = reader.getText(); + otherIdList.add(new OtherId(otherIdSource, content)); + } + } + case "Keyword" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + keywordList.add(reader.getText()); + } + } + case "SpaceFlightMission" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + spaceFlightMissionList.add(reader.getText()); + } + } + case "Investigator" -> { + parseInvestigator(reader, investigatorList, elementName); + } + case "GeneralNote" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + generalNoteList.add(reader.getText()); + } + } + } + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } - if (result.length() > 0) { - fields.put(field, result.toString()); + + // populate multiple occurrence fields + if (!citationSubsets.isEmpty()) { + fields.put(new UnknownField("citation-subset"), join(citationSubsets, ", ")); + } + addMeshHeading(fields, meshHeadingList); + addPersonalNames(fields, personalNameSubjectList); + addOtherId(fields, otherIdList); + addKeywords(fields, keywordList); + if (!spaceFlightMissionList.isEmpty()) { + fields.put(new UnknownField("space-flight-mission"), join(spaceFlightMissionList, ", ")); } + addInvestigators(fields, investigatorList); + addNotes(fields, generalNoteList); } - private void addContributionDate(Map fields, ContributionDate contributionDate) { - if ((contributionDate.getDay() != null) && (contributionDate.getMonth() != null) - && (contributionDate.getYear() != null)) { - String result = convertToDateFormat(contributionDate.getYear(), contributionDate.getMonth(), - contributionDate.getDay()); - fields.put(new UnknownField("contribution"), result); + private void parseInvestigator(XMLStreamReader reader, List investigatorList, String startElement) + throws XMLStreamException { + String lastName = ""; + String foreName = ""; + List affiliationList = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "LastName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + lastName = reader.getText(); + } + } + case "ForeName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + foreName = reader.getText(); + } + } + case "Affiliation" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + affiliationList.add(reader.getText()); + } + } + } + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } } - } - private String convertToDateFormat(String year, String month, String day) { - return String.format("%s-%s-%s", year, month, day); + investigatorList.add(new Investigator(lastName, foreName, affiliationList)); } - private void parseArticle(PubmedArticle article, List bibItems) { - Map fields = new HashMap<>(); + private void parsePersonalNameSubject(XMLStreamReader reader, List personalNameSubjectList, String startElement) + throws XMLStreamException { + String lastName = ""; + String foreName = ""; + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "LastName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + lastName = reader.getText(); + } + } + case "ForeName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + foreName = reader.getText(); + } + } + } + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } + } - if (article.getPubmedData() != null) { - if (article.getMedlineCitation().getDateRevised() != null) { - DateRevised dateRevised = article.getMedlineCitation().getDateRevised(); - addDateRevised(fields, dateRevised); - putIfValueNotNull(fields, StandardField.PUBSTATE, article.getPubmedData().getPublicationStatus()); - if (article.getPubmedData().getArticleIdList() != null) { - ArticleIdList articleIdList = article.getPubmedData().getArticleIdList(); - addArticleIdList(fields, articleIdList); + personalNameSubjectList.add(new PersonalNameSubject(lastName, foreName)); + } + + private void parseMeshHeading(XMLStreamReader reader, List meshHeadingList, String startElement) + throws XMLStreamException { + String descriptorName = ""; + List qualifierNames = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "DescriptorName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + descriptorName = reader.getText(); + } + } + case "QualifierName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + qualifierNames.add(reader.getText()); + } + } } } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } } - if (article.getMedlineCitation() != null) { - MedlineCitation medlineCitation = article.getMedlineCitation(); - fields.put(new UnknownField("status"), medlineCitation.getStatus()); - DateCreated dateCreated = medlineCitation.getDateCreated(); - if (medlineCitation.getDateCreated() != null) { - fields.put(new UnknownField("created"), - convertToDateFormat(dateCreated.getYear(), dateCreated.getMonth(), dateCreated.getDay())); + meshHeadingList.add(new MeshHeading(descriptorName, qualifierNames)); + } + + private void parseGeneSymbolList(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + List geneSymbols = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + if (elementName.equals("GeneSymbol")) { + reader.next(); + if (isCharacterXMLEvent(reader)) { + geneSymbols.add(reader.getText()); + } + } } - fields.put(new UnknownField("pubmodel"), medlineCitation.getArticle().getPubModel()); - if (medlineCitation.getDateCompleted() != null) { - DateCompleted dateCompleted = medlineCitation.getDateCompleted(); - fields.put(new UnknownField("completed"), - convertToDateFormat(dateCompleted.getYear(), dateCompleted.getMonth(), dateCompleted.getDay())); + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } + } - fields.put(StandardField.PMID, medlineCitation.getPMID().getContent()); - fields.put(StandardField.OWNER, medlineCitation.getOwner()); + if (!geneSymbols.isEmpty()) { + fields.put(new UnknownField("gene-symbols"), join(geneSymbols, ", ")); + } + } - addArticleInformation(fields, medlineCitation.getArticle().getContent()); + private void parseChemicalList(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + List chemicalNames = new ArrayList<>(); - MedlineJournalInfo medlineJournalInfo = medlineCitation.getMedlineJournalInfo(); - putIfValueNotNull(fields, new UnknownField("country"), medlineJournalInfo.getCountry()); - putIfValueNotNull(fields, new UnknownField("journal-abbreviation"), medlineJournalInfo.getMedlineTA()); - putIfValueNotNull(fields, new UnknownField("nlm-id"), medlineJournalInfo.getNlmUniqueID()); - putIfValueNotNull(fields, new UnknownField("issn-linking"), medlineJournalInfo.getISSNLinking()); - if (medlineCitation.getChemicalList() != null) { - if (medlineCitation.getChemicalList().getChemical() != null) { - addChemicals(fields, medlineCitation.getChemicalList().getChemical()); + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + if (elementName.equals("NameOfSubstance")) { + reader.next(); + if (isCharacterXMLEvent(reader)) { + chemicalNames.add(reader.getText()); + } } } - if (medlineCitation.getCitationSubset() != null) { - fields.put(new UnknownField("citation-subset"), join(medlineCitation.getCitationSubset(), ", ")); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } - if (medlineCitation.getGeneSymbolList() != null) { - addGeneSymbols(fields, medlineCitation.getGeneSymbolList()); + } + + fields.put(new UnknownField("chemicals"), join(chemicalNames, ", ")); + } + + private void parseMedlineJournalInfo(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "Country" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("country"), reader.getText()); + } + } + case "MedlineTA" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("journal-abbreviation"), reader.getText()); + } + } + case "NlmUniqueID" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("nlm-id"), reader.getText()); + } + } + case "ISSNLinking" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("issn-linking"), reader.getText()); + } + } + } } - if (medlineCitation.getMeshHeadingList() != null) { - addMeashHeading(fields, medlineCitation.getMeshHeadingList()); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } - putIfValueNotNull(fields, new UnknownField("references"), medlineCitation.getNumberOfReferences()); - if (medlineCitation.getPersonalNameSubjectList() != null) { - addPersonalNames(fields, medlineCitation.getPersonalNameSubjectList()); + } + } + + private void parseArticleInformation(XMLStreamReader reader, Map fields) throws XMLStreamException { + List titleList = new ArrayList<>(); + String pubmodel = reader.getAttributeValue(null, "PubModel"); + fields.put(new UnknownField("pubmodel"), pubmodel); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "Journal" -> { + parseJournal(reader, fields); + } + case "ArticleTitle" -> { + handleTextElement(reader, titleList, elementName); + } + case "Pagination" -> { + addPagination(reader, fields, elementName); + } + case "ELocationID" -> { + String eidType = reader.getAttributeValue(null, "EIdType"); + String validYN = reader.getAttributeValue(null, "ValidYN"); + reader.next(); + if (isCharacterXMLEvent(reader) && "Y".equals(validYN)) { + handleElocationId(fields, reader, eidType); + } + } + case "Abstract" -> { + addAbstract(reader, fields, elementName); + } + case "AuthorList" -> { + handleAuthorList(reader, fields, elementName); + } + } } - if (medlineCitation.getOtherID() != null) { - addOtherId(fields, medlineCitation.getOtherID()); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Article")) { + break; } - if (medlineCitation.getKeywordList() != null) { - addKeyWords(fields, medlineCitation.getKeywordList()); + } + + if (!titleList.isEmpty()) { + fields.put(StandardField.TITLE, StringUtil.stripBrackets(join(titleList, " "))); + } + } + + private void parseJournal(XMLStreamReader reader, Map fields) throws XMLStreamException { + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "Title" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.JOURNAL, reader.getText()); + } + } + case "ISSN" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.ISSN, reader.getText()); + } + } + case "Volume" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.VOLUME, reader.getText()); + } + } + case "Issue" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.ISSUE, reader.getText()); + } + } + case "PubDate" -> { + addPubDate(reader, fields, elementName); + } + } } - if (medlineCitation.getSpaceFlightMission() != null) { - fields.put(new UnknownField("space-flight-mission"), join(medlineCitation.getSpaceFlightMission(), ", ")); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Journal")) { + break; } - if (medlineCitation.getInvestigatorList() != null) { - addInvestigators(fields, medlineCitation.getInvestigatorList()); + } + } + + private void parseDate(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + Optional year = Optional.empty(); + Optional month = Optional.empty(); + Optional day = Optional.empty(); + + // mapping from date XML element to field name + Map dateFieldMap = Map.of( + "DateCreated", "created", + "DateCompleted", "completed", + "DateRevised", "revised", + "ContributionDate", "contribution", + "PubDate", "" + ); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "Year" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + year = Optional.of(reader.getText()); + } + } + case "Month" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + month = Optional.of(reader.getText()); + } + } + case "Day" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + day = Optional.of(reader.getText()); + } + } + } } - if (medlineCitation.getGeneralNote() != null) { - addNotes(fields, medlineCitation.getGeneralNote()); + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } - BibEntry entry = new BibEntry(StandardEntryType.Article); - entry.setField(fields); - - bibItems.add(entry); + Optional date = Date.parse(year, month, day); + date.ifPresent(dateValue -> + fields.put(new UnknownField(dateFieldMap.get(startElement)), dateValue.getNormalized())); } - private void addArticleIdList(Map fields, ArticleIdList articleIdList) { - for (ArticleId id : articleIdList.getArticleId()) { - if (id.getIdType() != null) { - if ("pubmed".equals(id.getIdType())) { - fields.put(StandardField.PMID, id.getContent()); + private void addArticleIdList(Map fields, List articleIdList) { + for (ArticleId id : articleIdList) { + if (!id.idType().isBlank()) { + if ("pubmed".equals(id.idType())) { + fields.computeIfAbsent(StandardField.PMID, k -> id.content()); } else { - fields.put(FieldFactory.parseField(StandardEntryType.Article, id.getIdType()), id.getContent()); + fields.computeIfAbsent(FieldFactory.parseField(StandardEntryType.Article, id.idType()), k -> id.content()); } } } } - private void addNotes(Map fields, List generalNote) { + private void addNotes(Map fields, List generalNoteList) { List notes = new ArrayList<>(); - for (GeneralNote note : generalNote) { - if (note != null) { - notes.add(note.getContent()); + + for (String note : generalNoteList) { + if (!note.isBlank()) { + notes.add(note); } } - fields.put(StandardField.NOTE, join(notes, ", ")); + + if (!notes.isEmpty()) { + fields.put(StandardField.NOTE, join(notes, ", ")); + } } - private void addInvestigators(Map fields, InvestigatorList investigatorList) { + private void addInvestigators(Map fields, List investigatorList) { List investigatorNames = new ArrayList<>(); List affiliationInfos = new ArrayList<>(); - String name; + // add the investigators like the authors - if (investigatorList.getInvestigator() != null) { - List investigators = investigatorList.getInvestigator(); - for (Investigator investigator : investigators) { - name = investigator.getLastName(); - if (investigator.getForeName() != null) { - name += ", " + investigator.getForeName(); + if (!investigatorList.isEmpty()) { + for (Investigator investigator : investigatorList) { + StringBuilder result = new StringBuilder(investigator.lastName()); + if (!investigator.foreName().isBlank()) { + result.append(", ").append(investigator.foreName()); } - investigatorNames.add(name); + investigatorNames.add(result.toString()); // now add the affiliation info - if (investigator.getAffiliationInfo() != null) { - for (AffiliationInfo info : investigator.getAffiliationInfo()) { - for (Serializable affiliation : info.getAffiliation().getContent()) { - if (affiliation instanceof String) { - affiliationInfos.add((String) affiliation); - } - } - } - fields.put(new UnknownField("affiliation"), join(affiliationInfos, ", ")); + if (!investigator.affiliationList().isEmpty()) { + affiliationInfos.addAll(investigator.affiliationList()); } } + + if (!affiliationInfos.isEmpty()) { + fields.put(new UnknownField("affiliation"), join(affiliationInfos, ", ")); + } + fields.put(new UnknownField("investigator"), join(investigatorNames, " and ")); } } - private void addKeyWords(Map fields, List allKeywordLists) { - List keywordStrings = new ArrayList<>(); - // add keywords to the list - for (KeywordList keywordList : allKeywordLists) { - for (Keyword keyword : keywordList.getKeyword()) { - for (Serializable content : keyword.getContent()) { - if (content instanceof String) { - keywordStrings.add((String) content); - } - } - } - } - // Check whether MeshHeadingList exist or not + private void addKeywords(Map fields, List keywordList) { + // Check whether MeshHeadingList exists or not if (fields.get(StandardField.KEYWORDS) == null) { - fields.put(StandardField.KEYWORDS, join(keywordStrings, KEYWORD_SEPARATOR)); + fields.put(StandardField.KEYWORDS, join(keywordList, KEYWORD_SEPARATOR)); } else { - if (keywordStrings.size() > 0) { + if (!keywordList.isEmpty()) { // if it exists, combine the MeshHeading with the keywords - String result = join(keywordStrings, "; "); + String result = join(keywordList, "; "); result = fields.get(StandardField.KEYWORDS) + KEYWORD_SEPARATOR + result; fields.put(StandardField.KEYWORDS, result); } } } - private void addOtherId(Map fields, List otherID) { - for (OtherID id : otherID) { - if ((id.getSource() != null) && (id.getContent() != null)) { - fields.put(FieldFactory.parseField(StandardEntryType.Article, id.getSource()), id.getContent()); + private void addOtherId(Map fields, List otherIdList) { + for (OtherId id : otherIdList) { + if (!id.source().isBlank() && !id.content().isBlank()) { + fields.put(FieldFactory.parseField(StandardEntryType.Article, id.source()), id.content()); } } } - private void addPersonalNames(Map fields, PersonalNameSubjectList personalNameSubjectList) { + private void addPersonalNames(Map fields, List personalNameSubjectList) { if (fields.get(StandardField.AUTHOR) == null) { // if no authors appear, then add the personal names as authors List personalNames = new ArrayList<>(); - if (personalNameSubjectList.getPersonalNameSubject() != null) { - List personalNameSubject = personalNameSubjectList.getPersonalNameSubject(); - for (PersonalNameSubject personalName : personalNameSubject) { - String name = personalName.getLastName(); - if (personalName.getForeName() != null) { - name += ", " + personalName.getForeName(); - } - personalNames.add(name); + + if (!personalNameSubjectList.isEmpty()) { + for (PersonalNameSubject personalNameSubject : personalNameSubjectList) { + StringBuilder result = new StringBuilder(personalNameSubject.lastName()); + if (!personalNameSubject.foreName().isBlank()) { + result.append(", ").append(personalNameSubject.foreName()); + } + personalNames.add(result.toString()); } + fields.put(StandardField.AUTHOR, join(personalNames, " and ")); } } } - private void addMeashHeading(Map fields, MeshHeadingList meshHeadingList) { - ArrayList keywords = new ArrayList<>(); - for (MeshHeading keyword : meshHeadingList.getMeshHeading()) { - StringBuilder result = new StringBuilder(keyword.getDescriptorName().getContent()); - if (keyword.getQualifierName() != null) { - for (QualifierName qualifier : keyword.getQualifierName()) { - result.append(", ").append(qualifier.getContent()); + private void addMeshHeading(Map fields, List meshHeadingList) { + List keywords = new ArrayList<>(); + + if (!meshHeadingList.isEmpty()) { + for (MeshHeading meshHeading : meshHeadingList) { + StringBuilder result = new StringBuilder(meshHeading.descriptorName()); + if (meshHeading.qualifierNames() != null) { + for (String qualifierName : meshHeading.qualifierNames()) { + result.append(", ").append(qualifierName); + } } + keywords.add(result.toString()); } - keywords.add(result.toString()); + + fields.put(StandardField.KEYWORDS, join(keywords, KEYWORD_SEPARATOR)); } - fields.put(StandardField.KEYWORDS, join(keywords, KEYWORD_SEPARATOR)); } - private void addGeneSymbols(Map fields, GeneSymbolList geneSymbolList) { - List geneSymbols = geneSymbolList.getGeneSymbol(); - fields.put(new UnknownField("gene-symbols"), join(geneSymbols, ", ")); - } + private void addPubDate(XMLStreamReader reader, Map fields, String startElement) throws XMLStreamException { + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "MedlineDate" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + fields.put(StandardField.YEAR, extractYear(reader.getText())); + } + } + case "Year" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + fields.put(StandardField.YEAR, reader.getText()); + } + } + case "Month" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + Optional month = Month.parse(reader.getText()); + month.ifPresent(monthValue -> fields.put(StandardField.MONTH, monthValue.getJabRefFormat())); + } + } + case "Season" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + fields.put(new UnknownField("season"), reader.getText()); + } + } + } + } - private void addChemicals(Map fields, List chemicals) { - List chemicalNames = new ArrayList<>(); - for (Chemical chemical : chemicals) { - if (chemical != null) { - chemicalNames.add(chemical.getNameOfSubstance().getContent()); + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } - fields.put(new UnknownField("chemicals"), join(chemicalNames, ", ")); } - private void addArticleInformation(Map fields, List content) { - for (Object object : content) { - if (object instanceof Journal) { - Journal journal = (Journal) object; - putIfValueNotNull(fields, StandardField.JOURNAL, journal.getTitle()); - - ISSN issn = journal.getISSN(); - if (issn != null) { - putIfValueNotNull(fields, StandardField.ISSN, issn.getContent()); + private void addAbstract(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { + List abstractTextList = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "CopyrightInformation" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, new UnknownField("copyright"), reader.getText()); + } + } + case "AbstractText" -> { + handleTextElement(reader, abstractTextList, elementName); + } } + } - JournalIssue journalIssue = journal.getJournalIssue(); - putIfValueNotNull(fields, StandardField.VOLUME, journalIssue.getVolume()); - putIfValueNotNull(fields, StandardField.ISSUE, journalIssue.getIssue()); - - addPubDate(fields, journalIssue.getPubDate()); - } else if (object instanceof ArticleTitle) { - ArticleTitle articleTitle = (ArticleTitle) object; - fields.put(StandardField.TITLE, StringUtil.stripBrackets(articleTitle.getContent().toString())); - } else if (object instanceof Pagination) { - Pagination pagination = (Pagination) object; - addPagination(fields, pagination); - } else if (object instanceof ELocationID) { - ELocationID eLocationID = (ELocationID) object; - addElocationID(fields, eLocationID); - } else if (object instanceof Abstract) { - Abstract abs = (Abstract) object; - addAbstract(fields, abs); - } else if (object instanceof AuthorList) { - AuthorList authors = (AuthorList) object; - handleAuthors(fields, authors); + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } - } - private void addElocationID(Map fields, ELocationID eLocationID) { - if (eLocationID.getEIdType().equals("doi")) { - fields.put(StandardField.DOI, eLocationID.getContent()); - } - if (eLocationID.getEIdType().equals("pii")) { - fields.put(new UnknownField("pii"), eLocationID.getContent()); + if (!abstractTextList.isEmpty()) { + fields.put(StandardField.ABSTRACT, join(abstractTextList, " ")); } } - private void addPubDate(Map fields, PubDate pubDate) { - if (pubDate.getYear() == null) { - // if year of the pubdate is null, the medlineDate shouldn't be null - fields.put(StandardField.YEAR, extractYear(pubDate.getMedlineDate())); - } else { - fields.put(StandardField.YEAR, pubDate.getYear()); - if (pubDate.getMonth() != null) { - Optional month = Month.parse(pubDate.getMonth()); - if (month.isPresent()) { - fields.put(StandardField.MONTH, month.get().getJabRefFormat()); + /** + * Handles text entities that can have inner tags such as {@literal <}i{@literal >}, {@literal <}b{@literal >} etc. + * We ignore the tags and return only the characters present in the enclosing parent element. + * + */ + private void handleTextElement(XMLStreamReader reader, List textList, String startElement) + throws XMLStreamException { + StringBuilder result = new StringBuilder(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "sup", "sub" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + result.append("(").append(reader.getText()).append(")"); + } + } } - } else if (pubDate.getSeason() != null) { - fields.put(new UnknownField("season"), pubDate.getSeason()); + } else if (isCharacterXMLEvent(reader)) { + result.append(reader.getText().trim()).append(" "); } - } - } - private void addAbstract(Map fields, Abstract abs) { - putIfValueNotNull(fields, new UnknownField("copyright"), abs.getCopyrightInformation()); - List abstractText = new ArrayList<>(); - for (AbstractText text : abs.getAbstractText()) { - for (Serializable textContent : text.getContent()) { - if (textContent instanceof String) { - abstractText.add((String) textContent); - } + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } - fields.put(StandardField.ABSTRACT, join(abstractText, " ")); + + textList.add(result.toString().trim()); } - private void addPagination(Map fields, Pagination pagination) { + private void addPagination(XMLStreamReader reader, Map fields, String startElement) + throws XMLStreamException { String startPage = ""; String endPage = ""; - for (JAXBElement element : pagination.getContent()) { - if ("MedlinePgn".equals(element.getName().getLocalPart())) { - putIfValueNotNull(fields, StandardField.PAGES, fixPageRange(element.getValue())); - } else if ("StartPage".equals(element.getName().getLocalPart())) { - // it could happen, that the article has only a start page - startPage = element.getValue() + endPage; - putIfValueNotNull(fields, StandardField.PAGES, startPage); - } else if ("EndPage".equals(element.getName().getLocalPart())) { - endPage = element.getValue(); - // but it should not happen, that a endpage appears without startpage - fields.put(StandardField.PAGES, fixPageRange(startPage + "-" + endPage)); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "MedlinePgn" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.PAGES, fixPageRange(reader.getText())); + } + } + case "StartPage" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + // it could happen, that the article has only a start page + startPage = reader.getText() + endPage; + putIfValueNotNull(fields, StandardField.PAGES, startPage); + } + } + case "EndPage" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + endPage = reader.getText(); + // but it should not happen, that a endpage appears without startpage + fields.put(StandardField.PAGES, fixPageRange(startPage + "-" + endPage)); + } + } + } + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } } @@ -648,31 +1147,68 @@ private String extractYear(String medlineDate) { return medlineDate.substring(0, 4); } - private void handleAuthors(Map fields, AuthorList authors) { + private void handleAuthorList(XMLStreamReader reader, Map fields, String startElement) throws XMLStreamException { List authorNames = new ArrayList<>(); - for (Author author : authors.getAuthor()) { - if (author.getCollectiveName() != null) { - Text collectiveNames = author.getCollectiveName(); - for (Serializable content : collectiveNames.getContent()) { - if (content instanceof String) { - authorNames.add((String) content); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "Author" -> { + parseAuthor(reader, authorNames); } } - } else { - String authorName = author.getLastName(); - if (author.getForeName() != null) { - authorName += ", " + author.getForeName(); - } - authorNames.add(authorName); + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; } } + fields.put(StandardField.AUTHOR, join(authorNames, " and ")); } - private void addDateRevised(Map fields, DateRevised dateRevised) { - if ((dateRevised.getDay() != null) && (dateRevised.getMonth() != null) && (dateRevised.getYear() != null)) { - fields.put(new UnknownField("revised"), - convertToDateFormat(dateRevised.getYear(), dateRevised.getMonth(), dateRevised.getDay())); + private void parseAuthor(XMLStreamReader reader, List authorNames) throws XMLStreamException { + StringBuilder authorName = new StringBuilder(); + List collectiveNames = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "CollectiveName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + collectiveNames.add(reader.getText()); + } + } + case "LastName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + authorName = new StringBuilder(reader.getText()); + } + } + case "ForeName" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + authorName.append(", ").append(reader.getText()); + } + } + } + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Author")) { + break; + } + } + + if (collectiveNames.size() > 0) { + authorNames.addAll(collectiveNames); + } + if (!authorName.toString().isBlank()) { + authorNames.add(authorName.toString()); } } @@ -683,7 +1219,8 @@ private void putIfValueNotNull(Map fields, Field field, String va } /** - * Convert medline page ranges from short form to full form. Medline reports page ranges in a shorthand format. The last page is reported using only the digits which differ from the first page. i.e. 12345-51 refers to the actual range 12345-12351 + * Convert medline page ranges from short form to full form. Medline reports page ranges in a shorthand format. + * The last page is reported using only the digits which differ from the first page. i.e. 12345-51 refers to the actual range 12345-12351 */ private String fixPageRange(String pageRange) { int minusPos = pageRange.indexOf('-'); @@ -700,6 +1237,18 @@ private String fixPageRange(String pageRange) { return startPage + "--" + endPage; } + private boolean isCharacterXMLEvent(XMLStreamReader reader) { + return reader.getEventType() == XMLEvent.CHARACTERS; + } + + private boolean isStartXMLEvent(XMLStreamReader reader) { + return reader.getEventType() == XMLEvent.START_ELEMENT; + } + + private boolean isEndXMLEvent(XMLStreamReader reader) { + return reader.getEventType() == XMLEvent.END_ELEMENT; + } + @Override public List parseEntries(InputStream inputStream) throws ParseException { try { diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleId.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleId.java new file mode 100644 index 00000000000..3a8be1b9b63 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleId.java @@ -0,0 +1,7 @@ +package org.jabref.logic.importer.fileformat.medline; + +public record ArticleId( + String idType, + String content +) { +} diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/Investigator.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/Investigator.java new file mode 100644 index 00000000000..64ea31e6206 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/Investigator.java @@ -0,0 +1,10 @@ +package org.jabref.logic.importer.fileformat.medline; + +import java.util.List; + +public record Investigator( + String lastName, + String foreName, + List affiliationList +) { +} diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeading.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeading.java new file mode 100644 index 00000000000..a78f65f9727 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeading.java @@ -0,0 +1,9 @@ +package org.jabref.logic.importer.fileformat.medline; + +import java.util.List; + +public record MeshHeading( + String descriptorName, + List qualifierNames +) { +} diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherId.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherId.java new file mode 100644 index 00000000000..4429436c332 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherId.java @@ -0,0 +1,7 @@ +package org.jabref.logic.importer.fileformat.medline; + +public record OtherId( + String source, + String content +) { +} diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubject.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubject.java new file mode 100644 index 00000000000..bda9c6aefff --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubject.java @@ -0,0 +1,7 @@ +package org.jabref.logic.importer.fileformat.medline; + +public record PersonalNameSubject( + String lastName, + String foreName +) { +} diff --git a/src/main/resources/xjc/medline/medline.xsd b/src/main/resources/xjc/medline/medline.xsd deleted file mode 100644 index 9c2f73a5bdd..00000000000 --- a/src/main/resources/xjc/medline/medline.xsd +++ /dev/null @@ -1,314 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib new file mode 100644 index 00000000000..f64ae9d0921 --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib @@ -0,0 +1,26 @@ +@Article{, + author = {Moreno-Grau, Sonia and Hernández, Isabel and Heilmann-Heimbach, Stefanie and Ruiz, Susana and Rosende-Roca, Maitée and Mauleón, Ana and Vargas, Liliana and Rodríguez-Gómez, Octavio and Alegret, Montserrat and Espinosa, Ana and Ortega, Gemma and Aguilera, Nuria and Abdelnour, Carla and Neuroimaging Initiative, Alzheimer's Disease and Gil, Silvia and Maier, Wolfgang and Sotolongo-Grau, Oscar and Tárraga, Lluís and Ramirez, Alfredo and López-Arrrieta, Jesús and Antúnez, Carmen and Serrano-Ríos, Manuel and Boada, Mercè and Ruiz, Agustín}, + journal = {Oncotarget}, + title = {Genome-wide significant risk factors on chromosome 19 and the APOE locus.}, + year = {2018}, + issn = {1949-2553}, + month = may, + pages = {24590--24600}, + volume = {9}, + abstract = {The apolipoprotein E ( APOE ) gene on chromosome 19q13.32, was the first, and remains the strongest, genetic risk factor for Alzheimer's disease (AD). Additional signals associated with AD have been located in chromosome 19, including ABCA7 (19p13.3) and CD33 ( 19q13.41). The ABCA7 gene has been replicated in most populations. However, the contribution to AD of other signals close to APOE gene remains controversial. Possible explanations for inconsistency between reports include long range linkage disequilibrium (LRLD). We analysed the contribution of ABCA7 and CD33 loci to AD risk and explore LRLD patterns across APOE region. To evaluate AD risk conferred by ABCA7 rs4147929:G>A and CD33 rs3865444:C>A, we used a large Spanish population (1796 AD cases, 2642 controls). The ABCA7 rs4147929:G>A SNP effect was nominally replicated in the Spanish cohort and reached genome-wide significance after meta-analysis (odds ratio (OR)=1.15, 95% confidence interval (95% CI)=1.12-1.19; P = 1.60 x 10 (-19)). CD33 rs3865444:C>A was not associated with AD in the dataset. The meta-analysis was also negative (OR=0.98, 95% CI=0.93-1.04; P =0.48). After exploring LRLD patterns between APOE and CD33 in several datasets, we found significant LD (D' >0.20; P <0.030) between APOE -Ɛ2 and CD33 rs3865444C>A in two of five datasets, suggesting the presence of a non-universal long range interaction between these loci affecting to some populations. In conclusion, we provide here evidence of genetic association of the ABCA7 locus in the Spanish population and also propose a plausible explanation for the controversy on the contribution of CD33 to AD susceptibility.}, + country = {United States}, + doi = {10.18632/oncotarget.25083}, + issn-linking = {1949-2553}, + issue = {37}, + journal-abbreviation = {Oncotarget}, + keywords = {ABCA7; APOE; CD33; Gerotarget; late onset Alzheimer’s disease; linkage disequilibrium}, + nlm-id = {101532965}, + owner = {NLM}, + pii = {25083}, + pmc = {PMC5973862}, + pmid = {29872490}, + pubmodel = {Electronic-eCollection}, + pubstate = {epublish}, + revised = {2019-11-20}, + status = {PubMed-not-MEDLINE}, +} diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml new file mode 100644 index 00000000000..fa4c03e3f4d --- /dev/null +++ b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml @@ -0,0 +1,652 @@ + + + + 29872490 + + 2019 + 11 + 20 + +
+ + 1949-2553 + + 9 + 37 + + 2018 + May + 15 + + + Oncotarget + Oncotarget + + Genome-wide significant risk factors on chromosome 19 and the + APOE locus. + + + 24590 + 24600 + 24590-24600 + + 10.18632/oncotarget.25083 + + The apolipoprotein E ( + APOE) gene on chromosome 19q13.32, was the first, and remains the strongest, genetic risk factor for Alzheimer's disease (AD). Additional signals associated with AD have been located in chromosome 19, including + ABCA7 (19p13.3) and + CD33 (19q13.41). The + ABCA7 gene has been replicated in most populations. However, the contribution to AD of other signals close to + APOE gene remains controversial. Possible explanations for inconsistency between reports include long range linkage disequilibrium (LRLD). We analysed the contribution of + ABCA7 and + CD33 loci to AD risk and explore LRLD patterns across + APOE region. To evaluate AD risk conferred by + ABCA7 rs4147929:G>A and + CD33 rs3865444:C>A, we used a large Spanish population (1796 AD cases, 2642 controls). The + ABCA7 rs4147929:G>A SNP effect was nominally replicated in the Spanish cohort and reached genome-wide significance after meta-analysis (odds ratio (OR)=1.15, 95% confidence interval (95% CI)=1.12-1.19; + P = 1.60 x 10 + -19). + CD33 rs3865444:C>A was not associated with AD in the dataset. The meta-analysis was also negative (OR=0.98, 95% CI=0.93-1.04; + P=0.48). After exploring LRLD patterns between + APOE and + CD33 in several datasets, we found significant LD (D' >0.20; + P <0.030) between + APOE-Ɛ2 and + CD33 rs3865444C>A in two of five datasets, suggesting the presence of a non-universal long range interaction between these loci affecting to some populations. In conclusion, we provide here evidence of genetic association of the + ABCA7 locus in the Spanish population and also propose a plausible explanation for the controversy on the contribution of + CD33 to AD susceptibility. + + + + + Moreno-Grau + Sonia + S + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Hernández + Isabel + I + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Heilmann-Heimbach + Stefanie + S + + Institute of Human Genetics, University of Bonn, Bonn, Germany. + + + Department of Genomics, Life & Brain Center, University of Bonn, Bonn, Germany. + + + + Ruiz + Susana + S + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Rosende-Roca + Maitée + M + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Mauleón + Ana + A + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Vargas + Liliana + L + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Rodríguez-Gómez + Octavio + O + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Alegret + Montserrat + M + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Espinosa + Ana + A + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Ortega + Gemma + G + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Aguilera + Nuria + N + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Abdelnour + Carla + C + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Neuroimaging Initiative + Alzheimer's Disease + AD + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + Institute of Human Genetics, University of Bonn, Bonn, Germany. + + + Department of Genomics, Life & Brain Center, University of Bonn, Bonn, Germany. + + + Department of Psychiatry and Psychotherapy, University of Bonn, Bonn, Germany. + + + German Center for Neurodegenerative Diseases, DZNE, Bonn, Germany. + + + Department of Psychiatry and Psychotherapy, University of Cologne, Cologne, Germany. + + + Memory Unit, University Hospital La Paz-Cantoblanco, Madrid, Spain. + + + Dementia Unit, University Hospital Virgen de la Arrixaca, Murcia, Spain. + + + Centro de Investigación Biomédica en Red de Diabetes y Enfermedades Metabólicas Asociadas, CIBERDEM, Spain, Hospital Clínico San Carlos, Madrid, Spain. + + + + Gil + Silvia + S + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Maier + Wolfgang + W + + Department of Psychiatry and Psychotherapy, University of Bonn, Bonn, Germany. + + + German Center for Neurodegenerative Diseases, DZNE, Bonn, Germany. + + + + Sotolongo-Grau + Oscar + O + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Tárraga + Lluís + L + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Ramirez + Alfredo + A + + Institute of Human Genetics, University of Bonn, Bonn, Germany. + + + Department of Psychiatry and Psychotherapy, University of Bonn, Bonn, Germany. + + + Department of Psychiatry and Psychotherapy, University of Cologne, Cologne, Germany. + + + + López-Arrrieta + Jesús + J + + Memory Unit, University Hospital La Paz-Cantoblanco, Madrid, Spain. + + + + Antúnez + Carmen + C + + Dementia Unit, University Hospital Virgen de la Arrixaca, Murcia, Spain. + + + + Serrano-Ríos + Manuel + M + + Centro de Investigación Biomédica en Red de Diabetes y Enfermedades Metabólicas Asociadas, CIBERDEM, Spain, Hospital Clínico San Carlos, Madrid, Spain. + + + + Boada + Mercè + M + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + Ruiz + Agustín + A + + Research Center and Memory Clinic of Fundació ACE, Institut Català de Neurociències Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain. + + + + eng + + + U01 AG024904 + AG + NIA NIH HHS + United States + + + + Journal Article + + + 2018 + 05 + 15 + +
+ + United States + Oncotarget + 101532965 + 1949-2553 + + + ABCA7 + APOE + CD33 + Gerotarget + late onset Alzheimer’s disease + linkage disequilibrium + + CONFLICTS OF INTEREST None. The authors declare that they have no competing interest. +
+ + + + 2017 + 12 + 13 + + + 2018 + 3 + 22 + + + 2018 + 6 + 7 + 6 + 0 + + + 2018 + 6 + 7 + 6 + 0 + + + 2018 + 6 + 7 + 6 + 1 + + + epublish + + 29872490 + PMC5973862 + 10.18632/oncotarget.25083 + 25083 + + + + Moreno-Grau S, Ruiz A. Genome research in pre-dementia stages of Alzheimer’s disease. Expert Rev Mol Med. 2016;18:e11. + + 27237222 + + + + Corder E, Saunders A. Gene dose of apolipoprotein E type 4 allele and the risk of Alzheimer’s disease in late onset families. Science. 1993;8:41–3. + + 8346443 + + + + Corder EH, Saunders AM, Risch NJ, Strittmatter WJ, Schmechel DE, Gaskell PC, Rimmler JB, Locke PA, Conneally PM, Schmader KE. Protective effect of apolipoprotein E type 2 allele for late onset Alzheimer disease. Nat Genet. 1994;7:180–4. + + 7920638 + + + + Roses AD, Lutz MW, Amrine-Madsen H, Saunders AM, Crenshaw DG, Sundseth SS, Huentelman MJ, Welsh-Bohmer KA, Reiman EM. A TOMM40 variable-length polymorphism predicts the age of late-onset Alzheimer’s disease. Pharmacogenomics J. 2010;10:375–84. + + PMC2946560 + 20029386 + + + + Seshadri S, Fitzpatrick AL, Ikram MA, DeStefano AL, Gudnason V, Boada M, Bis JC, Smith AV, Carassquillo MM, Lambert JC, Harold D, Schrijvers EM, Ramirez-Lorca R, et al. Genome-wide analysis of genetic loci associated with Alzheimer disease. JAMA. 2010;303:1832–40. + + PMC2989531 + 20460622 + + + + Hollingworth P, Harold D, Sims R, Gerrish A, Lambert JC, Carrasquillo MM, Abraham R, Hamshere ML, Pahwa JS, Moskvina V, Dowzell K, Jones N, Stretton A, et al. Common variants at ABCA7, MS4A6A/MS4A4E, EPHA1, CD33 and CD2AP are associated with Alzheimer’s disease. Nat Genet. 2011;43:429–35. + + PMC3084173 + 21460840 + + + + Cruchaga C, Karch CM, Jin SC, Benitez BA, Cai Y, Guerreiro R, Harari O, Norton J, Budde J, Bertelsen S, Jeng AT, Cooper B, Skorupa T, et al. Rare coding variants in the phospholipase D3 gene confer risk for Alzheimer ’ s disease. Nature. 2014;505:550–4. + + PMC4050701 + 24336208 + + + + Naj AC, Jun G, Beecham GW, Wang LS, Vardarajan BN, Buros J, Gallins PJ, Buxbaum JD, Jarvik GP, Crane PK, Larson EB, Bird TD, Boeve BF, et al. Common variants at MS4A4/MS4A6E, CD2AP, CD33 and EPHA1 are associated with late-onset Alzheimer’s disease. Nat Genet. 2011;43:436–41. + + PMC3090745 + 21460841 + + + + Lambert JC, Ibrahim-Verbaas CA, Harold D, Naj AC, Sims R, Bellenguez C, DeStafano AL, Bis JC, Beecham GW, Grenier-Boley B, Russo G, Thorton-Wells TA, Jones N, et al. Meta-analysis of 74,046 individuals identifies 11 new susceptibility loci for Alzheimer’s disease. Nat Genet. 2013;45:1452–8. + + PMC3896259 + 24162737 + + + + Reitz C, Jun G, Naj A, Rajbhandary R, Vardarajan BN, Wang LS, Valladares O, Lin CF, Larson EB, Graff-Radford NR, Evans D, De Jager PL, Crane PK, et al. Variants in the ATP-binding cassette transporter (ABCA7), apolipoprotein E ε4,and the risk of late-onset Alzheimer disease in African Americans. JAMA. 2013;309:1483–92. + + PMC3667653 + 23571587 + + + + Chouraki V, Seshadri S. Genetics of Alzheimer’s disease. Adv Genet. 2014;87:245–94. + + 25311924 + + + + Heilmann S, Drichel D, Clarimon J, Fernández V, Lacour A, Wagner H, Thelen M, Hernández I, Fortea J, Alegret M, Blesa R, Mauleón A, Roca MR, et al. PLD3 in non-familial Alzheimer’s disease. Nature. 2015;520:E3–5. + + 25832411 + + + + Carrasquillo MM, Belbin O, Hunter TA, Ma L, Bisceglio GD, Zou F, Crook JE, Pankratz VS, Sando SB, Aasly JO, Barcikowska M, Wszolek ZK, Dickson DW, et al. Replication of EPHA1 and CD33 associations with late-onset Alzheimer’s disease: a multi-centre case-control study. Mol Neurodegener. 2011;6:54. + + PMC3157442 + 21798052 + + + + Sakae N, Liu CC, Shinohara M, Frisch-Daiello J, Ma L, Yamazaki Y, Tachibana M, Younkin L, Kurti A, Carrasquillo MM, Zou F, Sevlever D, Bisceglio G, et al. ABCA7 Deficiency Accelerates Amyloid-β Generation and Alzheimer’s Neuronal Pathology. J Neurosci. 2016;36:3848–59. + + PMC4812140 + 27030769 + + + + Jehle AW, Gardai SJ, Li S, Linsel-Nitschke P, Morimoto K, Janssen WJ, Vandivier RW, Wang N, Greenberg S, Dale BM, Qin C, Henson PM, Tall AR. ATP-binding cassette transporter A7 enhances phagocytosis of apoptotic cells and associated ERK signaling in macrophages. J Cell Biol. 2006;174:547–56. + + PMC2064260 + 16908670 + + + + Kim WS, Li H, Ruberu K, Chan S, Elliott DA, Low JK, Cheng D, Karl T, Garner B. Deletion of Abca7 increases cerebral amyloid-β accumulation in the J20 mouse model of Alzheimer’s disease. J Neurosci. 2013;33:4387–94. + + PMC6704948 + 23467355 + + + + Bradshaw EM, Chibnik LB, Keenan BT, Ottoboni L, Raj T, Tang A, Rosenkrantz LL, Imboywa S, Lee M, Von Korff A, Morris MC, Evans DA, Johnson K, et al. CD33 Alzheimer’s disease locus: altered monocyte function and amyloid biology. Nat Neurosci. 2013;16:848–50. + + PMC3703870 + 23708142 + + + + Cruchaga C, Nowotny P, Kauwe JSK, Ridge PG, Mayo K, Bertelsen S, Hinrichs A, Fagan AM, Holtzman DM, Morris JC, Goate AM. Association and expression analyses with single-nucleotide polymorphisms in TOMM40 in Alzheimer disease. Arch Neurol. 2011;68:1013–9. + + PMC3204798 + 21825236 + + + + Mueller JC. Linkage disequilibrium for different scales and applications. Brief Bioinform. 2004;5:355–64. + + 15606972 + + + + Ardlie KG, Kruglyak L, Seielstad M. Patterns of Linkage Disequilibrium in the Human Genome. Nat Rev Genet. 2002;3:299–309. + + 11967554 + + + + Campbell CD, Ogburn EL, Lunetta KL, Lyon HN, Freedman ML, Groop LC, Altshuler D, Ardlie KG, Hirschhorn JN. Demonstrating stratification in a European American population. Nat Genet. 2005;37:868–72. + + 16041375 + + + + Dawson E, Abecasis GR, Bumpstead S, Chen Y, Hunt S, Beare DM, Pabial J, Dibling T, Tinsley E, Kirby S, Carter D, Papaspyridonos M, Livingstone S, et al. A first-generation linkage disequilibrium map of human chromosome 22. Nature. 2002;418:544–8. + + 12110843 + + + + Beckmann JS, Estivill X, Antonarakis SE. Copy number variants and genetic traits: closer to the resolution of phenotypic to genotypic variability. Nat Rev Genet. 2007;8:639–46. + + 17637735 + + + + Antúnez C, Boada M, González-Pérez A, Gayán J, Ramírez-Lorca R, Marín J, Hernández I, Moreno-Rey C, Morón FJ, López-Arrieta J, Mauleón A, Rosende-Roca M, Noguera-Perea F, et al. The membrane-spanning 4-domains, subfamily A (MS4A) gene cluster contains a common variant associated with Alzheimer’s disease. Genome Med. 2011;3:33. + + PMC3219074 + 21627779 + + + + Ruiz A, Heilmann S, Becker T, Hernández I, Wagner H, Thelen M, Mauleón A, Rosende-Roca M, Bellenguez C, Bis JC, Harold D, Gerrish A, Sims R, et al. Follow-up of loci from the International Genomics of Alzheimer’s Disease Project identifies TRIP4 as a novel susceptibility gene. Transl Psychiatry. 2014;4:e358. + + PMC3944635 + 24495969 + + + + Calero O, Hortigüela R, Bullido M, Calero M. Apolipoprotein E genotyping method by Real Time PCR, a fast and cost-effective alternative to the TaqMan® and FRET assays. J Neurosci Methods. 2009;183:238–40. + + 19583979 + + + + Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MA, Bender D, Maller J, Sklar P, de Bakker PI, Daly MJ, Sham PC. PLINK: a tool set for whole-genome association and population-based linkage analyses. Am J Hum Genet. 2007;81:559–75. + + PMC1950838 + 17701901 + + + + Kjeldsen EW, Tybjærg-Hansen A, Nordestgaard BG, Frikke-Schmidt R. ABCA7and risk of dementia and vascular disease in the Danish population. Ann Clin Transl Neurol. 2018;5:41–51. + + PMC5771325 + 29376091 + + + + Moreno DJ, Ruiz S, Ríos Á, Lopera F, Ostos H, Via M, Bedoya G. Association of GWAS Top Genes With Late-Onset Alzheimer’s Disease in Colombian Population. Am J Alzheimers Dis Other Demen. 2017;32:27–35. + + 28084078 + + + + Zhang DF, Li J, Wu H, Cui Y, Bi R, Zhou HJ, Wang HZ, Zhang C, Wang D, Kong QP, Li T, Fang Y, Jiang T, et al. CFH Variants Affect Structural and Functional Brain Changes and Genetic Risk of Alzheimer’s Disease. Neuropsychopharmacology. 2015;41:1034–1035. + + PMC4748428 + 26243271 + + + + Jiao B, Liu X, Zhou L, Wang MH, Zhou Y, Xiao T, Zhang W, Sun R, Waye MM, Tang B, Shen L. Polygenic Analysis of Late-Onset Alzheimer’s Disease from Mainland China. PLoS One. 2015;10:e0144898. + + PMC4683047 + 26680604 + + + + Mao YF, Guo ZY, Pu JL, Chen YX, Zhang BR. Association of CD33 and MS4A cluster variants with Alzheimer’s disease in East Asian Populations. Neurosci Lett. 2015;609:235–239. + + 26455864 + + + + Ebbert MT, Ridge PG, Wilson AR, Sharp AR, Bailey M, Norton MC, Tschanz JT, Munger RG, Corcoran CD, Kauwe JSK. Population-based Analysis of Alzheimer’s Disease Risk Alleles Implicates Genetic Interactions. Biol Psychiatry. 2014;75:732–7. + + PMC3867586 + 23954108 + + + + Omoumi A, Fok A, Greenwood T, Sadovnick AD, Feldman HH, Hsiung GY. Evaluation of late-onset Alzheimer disease genetic susceptibility risks in a Canadian population. Neurobiol Aging. 2014;35:936.e5-12. + + 24176626 + + + + Tan L, Yu JT, Zhang W, Wu ZC, Zhang Q, Liu QY, Wang W, Wang HF, Ma XY, Cui WZ. Association of GWAS-linked loci with late-onset Alzheimer’s disease in a northern Han Chinese population. Alzheimers Dement. 2013;9:546–53. + + 23232270 + + + + Chung SJ, Lee JH, Kim SY, You S, Kim MJ, Lee JY, Koh J. Association of GWAS top hits with late-onset Alzheimer disease in Korean population. Alzheimer Dis Assoc Disord. 2013;27:250–7. + + 22975751 + + + + Deng YL, Liu LH, Wang Y, Tang HD, Ren RJ, Xu W, Ma JF, Wang LL, Zhuang JP, Wang G, Chen SD. The prevalence of CD33 and MS4A6A variant in Chinese Han population with Alzheimer’s disease. Hum Genet. 2012;131:1245–9. + + 22382309 + + + + Logue MW. A Comprehensive Genetic Association Study of Alzheimer Disease in African Americans. Arch Neurol. 2011;68:1569. + + PMC3356921 + 22159054 + + + + Miyashita A, Koike A, Jun G, Wang LS, Takahashi S, Matsubara E, Kawarabayashi T, Shoji M, Tomita N, Arai H, Asada T, Harigaya Y, Ikeda M, et al. SORL1 is genetically associated with late-onset Alzheimer’s disease in Japanese, Koreans and Caucasians. PLoS One. 2013;8:e58618. + + PMC3614978 + 23565137 + + + + Weiner M, Aisen P, Jack C, Jr, Jaugust W, Trojanowski J, Shaw L, Saykin AJ, Morris JC, Cairns N, Laurel A, Toga A, Green R, Walter S, et al. The Alzheimer’s disease neuroinmaging iniciative: Progress report and future plans. Alzheimers Dement. 2010;6:202–11. + + PMC2927112 + 20451868 + + + + Li H, Wetten S, Li L, St Jean PL, Upmanyu R, Surh L, Hosford D, Barnes MR, Briley JD, Borrie M, Coletta N, Delisle R, Dhalla D, et al. Candidate single-nucleotide polymorphisms from a genomewide association study of Alzheimer disease. Arch Neurol. 2008;65:45–53. + + 17998437 + + + + Wijsman EM, Pankratz ND, Choi Y, Rothstein JH, Faber KM, Cheng R, Lee JH, Bird TD, Bennett DA, Diaz-Arrastia R, Goate AM, Farlow M, Ghetti B, et al. Genome-wide association of familial late-onset Alzheimer’s disease replicates BIN1 and CLU and nominates CUGBP2 in interaction with APOE. PLoS Genet. 2011;7:e1001308. + + PMC3040659 + 21379329 + + + + Zhang Q, Calus MP, Guldbrandtsen B, Lund MS, Sahana G. Estimation of inbreeding using pedigree, 50k SNP chip genotypes and full sequence data in three cattle breeds. BMC Genet. 2015;16:88. + + PMC4509611 + 26195126 + + + + +
+
diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib index 6d897517d74..29dddebede9 100644 --- a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib +++ b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib @@ -27,7 +27,7 @@ @article{ pubmodel = {Print-Electronic}, pubstate = {ppublish}, references = {23}, - revised = {2015-9-15}, + revised = {2015-09-15}, season = {Spring}, space-flight-mission = {fly}, status = {MEDLINE},