From 2c26f3b1689300eeacc514c47f5479fd3cd07440 Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Sun, 12 Mar 2023 13:42:39 -0700
Subject: [PATCH 1/8] use XML Stream Reader

---
 .../importer/fileformat/MedlineImporter.java  | 206 +++++++++++++++---
 1 file changed, 177 insertions(+), 29 deletions(-)
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index b9e9eac39d4..cbe13e06175 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -15,9 +15,11 @@
 import java.util.Objects;
 import java.util.Optional;
 
+import javax.xml.XMLConstants;
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
+import javax.xml.stream.events.XMLEvent;
 
 import org.jabref.logic.importer.Importer;
 import org.jabref.logic.importer.ParseException;
@@ -61,9 +63,7 @@
 import org.jabref.logic.importer.fileformat.medline.PublicationType;
 import org.jabref.logic.importer.fileformat.medline.Publisher;
 import org.jabref.logic.importer.fileformat.medline.PubmedArticle;
-import org.jabref.logic.importer.fileformat.medline.PubmedArticleSet;
 import org.jabref.logic.importer.fileformat.medline.PubmedBookArticle;
-import org.jabref.logic.importer.fileformat.medline.PubmedBookArticleSet;
 import org.jabref.logic.importer.fileformat.medline.PubmedBookData;
 import org.jabref.logic.importer.fileformat.medline.QualifierName;
 import org.jabref.logic.importer.fileformat.medline.Section;
@@ -71,6 +71,7 @@
 import org.jabref.logic.importer.fileformat.medline.Text;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.Date;
 import org.jabref.model.entry.Month;
 import org.jabref.model.entry.field.Field;
 import org.jabref.model.entry.field.FieldFactory;
@@ -140,46 +141,181 @@ public boolean isRecognizedFormat(BufferedReader reader) throws IOException {
     }
 
     @Override
-    public ParserResult importDatabase(BufferedReader reader) throws IOException {
-        Objects.requireNonNull(reader);
+    public ParserResult importDatabase(BufferedReader input) throws IOException {
+        Objects.requireNonNull(input);
 
         List<BibEntry> bibItems = new ArrayList<>();
 
         try {
-            Object unmarshalledObject = unmarshallRoot(reader);
-
-            // check whether we have an article set, an article, a book article or a book article set
-            if (unmarshalledObject instanceof PubmedArticleSet) {
-                PubmedArticleSet articleSet = (PubmedArticleSet) unmarshalledObject;
-                for (Object article : articleSet.getPubmedArticleOrPubmedBookArticle()) {
-                    if (article instanceof PubmedArticle) {
-                        PubmedArticle currentArticle = (PubmedArticle) article;
-                        parseArticle(currentArticle, bibItems);
-                    }
-                    if (article instanceof PubmedBookArticle) {
-                        PubmedBookArticle currentArticle = (PubmedBookArticle) article;
-                        parseBookArticle(currentArticle, bibItems);
+            XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
+
+            // prevent xxe (https://rules.sonarsource.com/java/RSPEC-2755)
+            xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+
+            XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(input);
+
+            while (reader.hasNext()) {
+                reader.next();
+                if (isStartXMLEvent(reader)) {
+                    String elementName = reader.getName().getLocalPart();
+                    switch (elementName) {
+                        case "PubmedArticle" -> {
+                            // Case 3: PubmedArticle
+                            parseArticleNew(reader, bibItems);
+                        }
                     }
-                }
-            } else if (unmarshalledObject instanceof PubmedArticle) {
-                PubmedArticle article = (PubmedArticle) unmarshalledObject;
-                parseArticle(article, bibItems);
-            } else if (unmarshalledObject instanceof PubmedBookArticle) {
-                PubmedBookArticle currentArticle = (PubmedBookArticle) unmarshalledObject;
-                parseBookArticle(currentArticle, bibItems);
-            } else {
-                PubmedBookArticleSet bookArticleSet = (PubmedBookArticleSet) unmarshalledObject;
-                for (PubmedBookArticle bookArticle : bookArticleSet.getPubmedBookArticle()) {
-                    parseBookArticle(bookArticle, bibItems);
+
+                    // Case 1: PubmedArticleSet
+
+                    // Case 2: PubmedBookArticleSet
+
+                    // Case 4: PubmedBookArticle
                 }
             }
-        } catch (JAXBException | XMLStreamException e) {
+
+//            Object unmarshalledObject = unmarshallRoot(reader);
+//
+//            // check whether we have an article set, an article, a book article or a book article set
+//            if (unmarshalledObject instanceof PubmedArticleSet) {
+//                PubmedArticleSet articleSet = (PubmedArticleSet) unmarshalledObject;
+//                for (Object article : articleSet.getPubmedArticleOrPubmedBookArticle()) {
+//                    if (article instanceof PubmedArticle) {
+//                        PubmedArticle currentArticle = (PubmedArticle) article;
+//                        parseArticle(currentArticle, bibItems);
+//                    }
+//                    if (article instanceof PubmedBookArticle) {
+//                        PubmedBookArticle currentArticle = (PubmedBookArticle) article;
+//                        parseBookArticle(currentArticle, bibItems);
+//                    }
+//                }
+//            } else if (unmarshalledObject instanceof PubmedArticle) {
+//                PubmedArticle article = (PubmedArticle) unmarshalledObject;
+//                parseArticle(article, bibItems);
+//            } else if (unmarshalledObject instanceof PubmedBookArticle) {
+//                PubmedBookArticle currentArticle = (PubmedBookArticle) unmarshalledObject;
+//                parseBookArticle(currentArticle, bibItems);
+//            } else {
+//                PubmedBookArticleSet bookArticleSet = (PubmedBookArticleSet) unmarshalledObject;
+//                for (PubmedBookArticle bookArticle : bookArticleSet.getPubmedBookArticle()) {
+//                    parseBookArticle(bookArticle, bibItems);
+//                }
+//            }
+        } catch (XMLStreamException e) {
             LOGGER.debug("could not parse document", e);
             return ParserResult.fromError(e);
         }
+
         return new ParserResult(bibItems);
     }
 
+    private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems) throws XMLStreamException {
+        Map<Field, String> fields = new HashMap<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "MedlineCitation" -> {
+                        parseMedlineCitation(reader, fields);
+                    }
+                    case "PubmedData" -> {
+                        //
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("PubmedArticle")) {
+                break;
+            }
+        }
+
+        BibEntry entry = new BibEntry(StandardEntryType.Article);
+        entry.setField(fields);
+
+        bibItems.add(entry);
+    }
+
+    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+        String status = reader.getAttributeValue(null, "Status");
+        String owner = reader.getAttributeValue(null, "Owner");
+        fields.put(new UnknownField("status"), status);
+        fields.put(StandardField.OWNER, owner);
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "DateCreated", "DateCompleted" -> {
+                        parseDate(reader, elementName, fields);
+                    }
+                    case "Article" -> {
+                        String pubmodel = reader.getAttributeValue(null, "PubModel");
+                        fields.put(new UnknownField("pubmodel"), pubmodel);
+                    }
+                    case "PMID" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            fields.put(StandardField.PMID, reader.getText());
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("MedlineCitation")) {
+                break;
+            }
+        }
+    }
+
+    private void parseDate(XMLStreamReader reader, String parentElement, Map<Field, String> fields) throws XMLStreamException {
+        Optional<String> year = Optional.empty();
+        Optional<String> month = Optional.empty();
+        Optional<String> day = Optional.empty();
+
+        // mapping from date XML element to field name
+        Map<String, String> dateFieldMap = Map.of(
+                "DateCreated", "created",
+                "DateCompleted", "completed"
+        );
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "Year" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            year = Optional.of(reader.getText());
+                        }
+                    }
+                    case "Month" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            month = Optional.of(reader.getText());
+                        }
+                    }
+                    case "Day" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            day = Optional.of(reader.getText());
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(parentElement)) {
+                break;
+            }
+        }
+
+        Optional<Date> date = Date.parse(year, month, day);
+        date.ifPresent(dateValue ->
+                fields.put(new UnknownField(dateFieldMap.get(parentElement)), dateValue.getNormalized()));
+    }
+
     private Object unmarshallRoot(BufferedReader reader) throws JAXBException, XMLStreamException {
         initUmarshaller();
 
@@ -700,6 +836,18 @@ private String fixPageRange(String pageRange) {
         return startPage + "--" + endPage;
     }
 
+    private boolean isCharacterXMLEvent(XMLStreamReader reader) {
+        return reader.getEventType() == XMLEvent.CHARACTERS;
+    }
+
+    private boolean isStartXMLEvent(XMLStreamReader reader) {
+        return reader.getEventType() == XMLEvent.START_ELEMENT;
+    }
+
+    private boolean isEndXMLEvent(XMLStreamReader reader) {
+        return reader.getEventType() == XMLEvent.END_ELEMENT;
+    }
+
     @Override
     public List<BibEntry> parseEntries(InputStream inputStream) throws ParseException {
         try {

From 6e38802cc7a79e7c7b462257c30c2361f3bffa61 Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Sun, 12 Mar 2023 19:32:23 -0700
Subject: [PATCH 2/8] add ArticleInformation parser

---
 .../importer/fileformat/MedlineImporter.java  | 320 ++++++++++++++----
 1 file changed, 251 insertions(+), 69 deletions(-)

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index cbe13e06175..3729325d8ed 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -26,12 +26,10 @@
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.ParserResult;
 import org.jabref.logic.importer.fileformat.medline.Abstract;
-import org.jabref.logic.importer.fileformat.medline.AbstractText;
 import org.jabref.logic.importer.fileformat.medline.AffiliationInfo;
 import org.jabref.logic.importer.fileformat.medline.ArticleId;
 import org.jabref.logic.importer.fileformat.medline.ArticleIdList;
 import org.jabref.logic.importer.fileformat.medline.ArticleTitle;
-import org.jabref.logic.importer.fileformat.medline.Author;
 import org.jabref.logic.importer.fileformat.medline.AuthorList;
 import org.jabref.logic.importer.fileformat.medline.Book;
 import org.jabref.logic.importer.fileformat.medline.BookDocument;
@@ -59,7 +57,6 @@
 import org.jabref.logic.importer.fileformat.medline.Pagination;
 import org.jabref.logic.importer.fileformat.medline.PersonalNameSubject;
 import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectList;
-import org.jabref.logic.importer.fileformat.medline.PubDate;
 import org.jabref.logic.importer.fileformat.medline.PublicationType;
 import org.jabref.logic.importer.fileformat.medline.Publisher;
 import org.jabref.logic.importer.fileformat.medline.PubmedArticle;
@@ -68,7 +65,6 @@
 import org.jabref.logic.importer.fileformat.medline.QualifierName;
 import org.jabref.logic.importer.fileformat.medline.Section;
 import org.jabref.logic.importer.fileformat.medline.Sections;
-import org.jabref.logic.importer.fileformat.medline.Text;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.Date;
@@ -82,7 +78,6 @@
 
 import com.google.common.base.Joiner;
 import jakarta.xml.bind.JAXBContext;
-import jakarta.xml.bind.JAXBElement;
 import jakarta.xml.bind.JAXBException;
 import jakarta.xml.bind.Unmarshaller;
 import org.slf4j.Logger;
@@ -251,8 +246,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                         parseDate(reader, elementName, fields);
                     }
                     case "Article" -> {
-                        String pubmodel = reader.getAttributeValue(null, "PubModel");
-                        fields.put(new UnknownField("pubmodel"), pubmodel);
+                        parseArticleInformation(reader, fields);
                     }
                     case "PMID" -> {
                         reader.next();
@@ -269,6 +263,96 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         }
     }
 
+    private void parseArticleInformation(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+        String pubmodel = reader.getAttributeValue(null, "PubModel");
+        fields.put(new UnknownField("pubmodel"), pubmodel);
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "Journal" -> {
+                        parseJournal(reader, fields);
+                    }
+                    case "ArticleTitle" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            fields.put(StandardField.TITLE, StringUtil.stripBrackets(reader.getText()));
+                        }
+                    }
+                    case "Pagination" -> {
+                        addPagination(reader, fields);
+                    }
+                    case "ELocationID" -> {
+                        String eidType = reader.getAttributeValue(null, "EIdType");
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            if (eidType.equals("doi")) {
+                                fields.put(StandardField.DOI, reader.getText());
+                            }
+                            if (eidType.equals("pii")) {
+                                fields.put(new UnknownField("pii"), reader.getText());
+                            }
+                        }
+                    }
+                    case "Abstract" -> {
+                        addAbstract(reader, fields);
+                    }
+                    case "AuthorList" -> {
+                        handleAuthorList(reader, fields);
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Article")) {
+                break;
+            }
+        }
+    }
+
+    private void parseJournal(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "Title" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.JOURNAL, reader.getText());
+                        }
+                    }
+                    case "ISSN" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.ISSN, reader.getText());
+                        }
+                    }
+                    case "Volume" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.VOLUME, reader.getText());
+                        }
+                    }
+                    case "Issue" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.ISSUE, reader.getText());
+                        }
+                    }
+                    case "PubDate" -> {
+                        addPubDate(reader, fields);
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Journal")) {
+                break;
+            }
+        }
+    }
+
     private void parseDate(XMLStreamReader reader, String parentElement, Map<Field, String> fields) throws XMLStreamException {
         Optional<String> year = Optional.empty();
         Optional<String> month = Optional.empty();
@@ -349,11 +433,11 @@ private void parseBookArticle(PubmedBookArticle currentArticle, List<BibEntry> b
             }
             if (bookDocument.getAbstract() != null) {
                 Abstract abs = bookDocument.getAbstract();
-                addAbstract(fields, abs);
+                // addAbstract(fields, abs);
             }
             if (bookDocument.getPagination() != null) {
                 Pagination pagination = bookDocument.getPagination();
-                addPagination(fields, pagination);
+                // addPagination(fields, pagination);
             }
             if (bookDocument.getSections() != null) {
                 ArrayList<String> result = new ArrayList<>();
@@ -419,14 +503,14 @@ private void addBookInformation(Map<Field, String> fields, Book book) {
             putStringFromSerializableList(fields, StandardField.TITLE, title.getContent());
         }
         if (book.getPubDate() != null) {
-            addPubDate(fields, book.getPubDate());
+            // addPubDate(fields, book.getPubDate());
         }
         if (book.getAuthorList() != null) {
             List<AuthorList> authorLists = book.getAuthorList();
             // authorLists size should be one
             if (authorLists.size() == 1) {
                 for (AuthorList authorList : authorLists) {
-                    handleAuthors(fields, authorList);
+                    // handleAuthorList(fields, authorList);
                 }
             } else {
                 LOGGER.info(String.format("Size of authorlist was %s", authorLists.size()));
@@ -440,7 +524,7 @@ private void addBookInformation(Map<Field, String> fields, Book book) {
 
         if (book.getELocationID() != null) {
             for (ELocationID id : book.getELocationID()) {
-                addElocationID(fields, id);
+//                addElocationID(fields, id);
             }
         }
         if (book.getIsbn() != null) {
@@ -702,79 +786,133 @@ private void addArticleInformation(Map<Field, String> fields, List<Object> conte
                 putIfValueNotNull(fields, StandardField.VOLUME, journalIssue.getVolume());
                 putIfValueNotNull(fields, StandardField.ISSUE, journalIssue.getIssue());
 
-                addPubDate(fields, journalIssue.getPubDate());
+                // addPubDate(fields, journalIssue.getPubDate());
             } else if (object instanceof ArticleTitle) {
                 ArticleTitle articleTitle = (ArticleTitle) object;
                 fields.put(StandardField.TITLE, StringUtil.stripBrackets(articleTitle.getContent().toString()));
             } else if (object instanceof Pagination) {
                 Pagination pagination = (Pagination) object;
-                addPagination(fields, pagination);
+                // addPagination(fields, pagination);
             } else if (object instanceof ELocationID) {
                 ELocationID eLocationID = (ELocationID) object;
-                addElocationID(fields, eLocationID);
+//                addElocationID(fields, eLocationID);
             } else if (object instanceof Abstract) {
                 Abstract abs = (Abstract) object;
-                addAbstract(fields, abs);
+                // addAbstract(fields, abs);
             } else if (object instanceof AuthorList) {
                 AuthorList authors = (AuthorList) object;
-                handleAuthors(fields, authors);
+//                handleAuthorList(fields, authors);
             }
         }
     }
 
-    private void addElocationID(Map<Field, String> fields, ELocationID eLocationID) {
-        if (eLocationID.getEIdType().equals("doi")) {
-            fields.put(StandardField.DOI, eLocationID.getContent());
-        }
-        if (eLocationID.getEIdType().equals("pii")) {
-            fields.put(new UnknownField("pii"), eLocationID.getContent());
-        }
-    }
-
-    private void addPubDate(Map<Field, String> fields, PubDate pubDate) {
-        if (pubDate.getYear() == null) {
-            // if year of the pubdate is null, the medlineDate shouldn't be null
-            fields.put(StandardField.YEAR, extractYear(pubDate.getMedlineDate()));
-        } else {
-            fields.put(StandardField.YEAR, pubDate.getYear());
-            if (pubDate.getMonth() != null) {
-                Optional<Month> month = Month.parse(pubDate.getMonth());
-                if (month.isPresent()) {
-                    fields.put(StandardField.MONTH, month.get().getJabRefFormat());
+    private void addPubDate(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "MedlineDate" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            fields.put(StandardField.YEAR, extractYear(reader.getText()));
+                        }
+                    }
+                    case "Year" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            fields.put(StandardField.YEAR, reader.getText());
+                        }
+                    }
+                    case "Month" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            Optional<Month> month = Month.parse(reader.getText());
+                            month.ifPresent(monthValue -> fields.put(StandardField.MONTH, monthValue.getJabRefFormat()));
+                        }
+                    }
+                    case "Season" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            fields.put(new UnknownField("season"), reader.getText());
+                        }
+                    }
                 }
-            } else if (pubDate.getSeason() != null) {
-                fields.put(new UnknownField("season"), pubDate.getSeason());
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("PubDate")) {
+                break;
             }
         }
     }
 
-    private void addAbstract(Map<Field, String> fields, Abstract abs) {
-        putIfValueNotNull(fields, new UnknownField("copyright"), abs.getCopyrightInformation());
+    private void addAbstract(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
         List<String> abstractText = new ArrayList<>();
-        for (AbstractText text : abs.getAbstractText()) {
-            for (Serializable textContent : text.getContent()) {
-                if (textContent instanceof String) {
-                    abstractText.add((String) textContent);
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "CopyrightInformation" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("copyright"), reader.getText());
+                        }
+                    }
+                    case "AbstractText" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            abstractText.add(reader.getText());
+                        }
+                    }
                 }
             }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Abstract")) {
+                break;
+            }
         }
+
         fields.put(StandardField.ABSTRACT, join(abstractText, " "));
     }
 
-    private void addPagination(Map<Field, String> fields, Pagination pagination) {
+    private void addPagination(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
         String startPage = "";
         String endPage = "";
-        for (JAXBElement<String> element : pagination.getContent()) {
-            if ("MedlinePgn".equals(element.getName().getLocalPart())) {
-                putIfValueNotNull(fields, StandardField.PAGES, fixPageRange(element.getValue()));
-            } else if ("StartPage".equals(element.getName().getLocalPart())) {
-                // it could happen, that the article has only a start page
-                startPage = element.getValue() + endPage;
-                putIfValueNotNull(fields, StandardField.PAGES, startPage);
-            } else if ("EndPage".equals(element.getName().getLocalPart())) {
-                endPage = element.getValue();
-                // but it should not happen, that a endpage appears without startpage
-                fields.put(StandardField.PAGES, fixPageRange(startPage + "-" + endPage));
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "MedlinePgn" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.PAGES, fixPageRange(reader.getText()));
+                        }
+                    }
+                    case "StartPage" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            // it could happen, that the article has only a start page
+                            startPage = reader.getText() + endPage;
+                            putIfValueNotNull(fields, StandardField.PAGES, startPage);
+                        }
+                    }
+                    case "EndPage" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            endPage = reader.getText();
+                            // but it should not happen, that a endpage appears without startpage
+                            fields.put(StandardField.PAGES, fixPageRange(startPage + "-" + endPage));
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Pagination")) {
+                break;
             }
         }
     }
@@ -784,27 +922,71 @@ private String extractYear(String medlineDate) {
         return medlineDate.substring(0, 4);
     }
 
-    private void handleAuthors(Map<Field, String> fields, AuthorList authors) {
+    private void handleAuthorList(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
         List<String> authorNames = new ArrayList<>();
-        for (Author author : authors.getAuthor()) {
-            if (author.getCollectiveName() != null) {
-                Text collectiveNames = author.getCollectiveName();
-                for (Serializable content : collectiveNames.getContent()) {
-                    if (content instanceof String) {
-                        authorNames.add((String) content);
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "Author" -> {
+                        parseAuthor(reader, authorNames);
                     }
                 }
-            } else {
-                String authorName = author.getLastName();
-                if (author.getForeName() != null) {
-                    authorName += ", " + author.getForeName();
-                }
-                authorNames.add(authorName);
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("AuthorList")) {
+                break;
             }
         }
+
         fields.put(StandardField.AUTHOR, join(authorNames, " and "));
     }
 
+    private void parseAuthor(XMLStreamReader reader, List<String> authorNames) throws XMLStreamException {
+        String authorName = "";
+        List<String> collectiveNames = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "CollectiveName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            collectiveNames.add(reader.getText());
+                        }
+                    }
+                    case "LastName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            authorName = reader.getText();
+                        }
+                    }
+                    case "ForeName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            authorName += ", " + reader.getText();
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Author")) {
+                break;
+            }
+        }
+
+        if (collectiveNames.size() > 0) {
+            authorNames.addAll(collectiveNames);
+        }
+        if (!authorName.isBlank()) {
+            authorNames.add(authorName);
+        }
+    }
+
     private void addDateRevised(Map<Field, String> fields, DateRevised dateRevised) {
         if ((dateRevised.getDay() != null) && (dateRevised.getMonth() != null) && (dateRevised.getYear() != null)) {
             fields.put(new UnknownField("revised"),

From bbb6157ece4c8b6dd406b05909e75e5246c4eb43 Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Sun, 12 Mar 2023 23:28:38 -0700
Subject: [PATCH 3/8] update MedlineCitation parser

---
 .../importer/fileformat/MedlineImporter.java  | 329 ++++++++++++++----
 .../fileformat/medline/MeshHeadingRec.java    |   9 +
 .../fileformat/medline/OtherIDRec.java        |   7 +
 .../medline/PersonalNameSubjectRec.java       |   7 +
 4 files changed, 291 insertions(+), 61 deletions(-)
 create mode 100644 src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java
 create mode 100644 src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java
 create mode 100644 src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index 3729325d8ed..7c5dedc0fd1 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -47,22 +47,17 @@
 import org.jabref.logic.importer.fileformat.medline.InvestigatorList;
 import org.jabref.logic.importer.fileformat.medline.Journal;
 import org.jabref.logic.importer.fileformat.medline.JournalIssue;
-import org.jabref.logic.importer.fileformat.medline.Keyword;
-import org.jabref.logic.importer.fileformat.medline.KeywordList;
 import org.jabref.logic.importer.fileformat.medline.MedlineCitation;
 import org.jabref.logic.importer.fileformat.medline.MedlineJournalInfo;
-import org.jabref.logic.importer.fileformat.medline.MeshHeading;
-import org.jabref.logic.importer.fileformat.medline.MeshHeadingList;
-import org.jabref.logic.importer.fileformat.medline.OtherID;
+import org.jabref.logic.importer.fileformat.medline.MeshHeadingRec;
+import org.jabref.logic.importer.fileformat.medline.OtherIDRec;
 import org.jabref.logic.importer.fileformat.medline.Pagination;
-import org.jabref.logic.importer.fileformat.medline.PersonalNameSubject;
-import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectList;
+import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectRec;
 import org.jabref.logic.importer.fileformat.medline.PublicationType;
 import org.jabref.logic.importer.fileformat.medline.Publisher;
 import org.jabref.logic.importer.fileformat.medline.PubmedArticle;
 import org.jabref.logic.importer.fileformat.medline.PubmedBookArticle;
 import org.jabref.logic.importer.fileformat.medline.PubmedBookData;
-import org.jabref.logic.importer.fileformat.medline.QualifierName;
 import org.jabref.logic.importer.fileformat.medline.Section;
 import org.jabref.logic.importer.fileformat.medline.Sections;
 import org.jabref.logic.util.StandardFileType;
@@ -156,15 +151,14 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
                     switch (elementName) {
                         case "PubmedArticle" -> {
                             // Case 3: PubmedArticle
-                            parseArticleNew(reader, bibItems);
+                            parseArticleNew(reader, bibItems, elementName);
                         }
-                    }
-
-                    // Case 1: PubmedArticleSet
+                        // Case 1: PubmedArticleSet
 
-                    // Case 2: PubmedBookArticleSet
+                        // Case 2: PubmedBookArticleSet
 
-                    // Case 4: PubmedBookArticle
+                        // Case 4: PubmedBookArticle
+                    }
                 }
             }
 
@@ -203,7 +197,7 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
         return new ParserResult(bibItems);
     }
 
-    private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems) throws XMLStreamException {
+    private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems, String parentElement) throws XMLStreamException {
         Map<Field, String> fields = new HashMap<>();
 
         while (reader.hasNext()) {
@@ -212,7 +206,7 @@ private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems) th
                 String elementName = reader.getName().getLocalPart();
                 switch (elementName) {
                     case "MedlineCitation" -> {
-                        parseMedlineCitation(reader, fields);
+                        parseMedlineCitation(reader, fields, elementName);
                     }
                     case "PubmedData" -> {
                         //
@@ -220,7 +214,7 @@ private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems) th
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("PubmedArticle")) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(parentElement)) {
                 break;
             }
         }
@@ -231,7 +225,15 @@ private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems) th
         bibItems.add(entry);
     }
 
-    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields, String parentElement) throws XMLStreamException {
+        // multiple occurrences of the following fields can be present
+        List<String> citationSubsets = new ArrayList<>();
+        List<MeshHeadingRec> meshHeadingList = new ArrayList<>();
+        List<PersonalNameSubjectRec> personalNameSubjectList = new ArrayList<>();
+        List<OtherIDRec> otherIDList = new ArrayList<>();
+        List<String> keywordList = new ArrayList<>();
+        List<String> spaceFlightMissionList = new ArrayList<>();
+
         String status = reader.getAttributeValue(null, "Status");
         String owner = reader.getAttributeValue(null, "Owner");
         fields.put(new UnknownField("status"), status);
@@ -254,10 +256,221 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                             fields.put(StandardField.PMID, reader.getText());
                         }
                     }
+                    case "MedlineJournalInfo" -> {
+                        parseMedlineJournalInfo(reader, fields, elementName);
+                    }
+                    case "ChemicalList" -> {
+                        parseChemicalList(reader, fields, elementName);
+                    }
+                    case "CitationSubset" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            citationSubsets.add(reader.getText());
+                        }
+                    }
+                    case "GeneSymbol" -> {
+                        parseGeneSymbolList(reader, fields, elementName);
+                    }
+                    case "MeshHeading" -> {
+                        parseMeshHeading(reader, meshHeadingList, elementName);
+                    }
+                    case "NumberofReferences" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("references"), reader.getText());
+                        }
+                    }
+                    case "PersonalNameSubject" -> {
+                        parsePersonalNameSubject(reader, personalNameSubjectList, elementName);
+                    }
+                    case "OtherID" -> {
+                        String otherIdSource = reader.getAttributeValue(null, "Source");
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            String content = reader.getText();
+                            otherIDList.add(new OtherIDRec(otherIdSource, content));
+                        }
+                    }
+                    case "Keyword" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            keywordList.add(reader.getText());
+                        }
+                    }
+                    case "SpaceFlightMission" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            spaceFlightMissionList.add(reader.getText());
+                        }
+                    }
+                    case "InvestigatorList" -> {
+                        // TODO
+                    }
+                    case "GeneralNote" -> {
+                        // TODO
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(parentElement)) {
+                break;
+            }
+        }
+
+        // populate multiple occurrence fields
+        fields.put(new UnknownField("citation-subset"), join(citationSubsets, ", "));
+        addMeshHeading(fields, meshHeadingList);
+        addPersonalNames(fields, personalNameSubjectList);
+        addOtherId(fields, otherIDList);
+        addKeywords(fields, keywordList);
+        fields.put(new UnknownField("space-flight-mission"), join(spaceFlightMissionList, ", "));
+    }
+
+    private void parsePersonalNameSubject(XMLStreamReader reader, List<PersonalNameSubjectRec> personalNameSubjectList, String startElement)
+            throws XMLStreamException {
+        String lastName = "";
+        String foreName = "";
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "LastName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            lastName = reader.getText();
+                        }
+                    }
+                    case "ForeName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            foreName = reader.getText();
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        personalNameSubjectList.add(new PersonalNameSubjectRec(lastName, foreName));
+    }
+
+    private void parseMeshHeading(XMLStreamReader reader, List<MeshHeadingRec> meshHeadingList, String startElement) throws XMLStreamException {
+        String descriptorName = "";
+        List<String> qualifierNames = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "DescriptorName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            descriptorName = reader.getText();
+                        }
+                    }
+                    case "QualifierName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            qualifierNames.add(reader.getText());
+                        }
+                    }
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("MedlineCitation")) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        meshHeadingList.add(new MeshHeadingRec(descriptorName, qualifierNames));
+    }
+
+    private void parseGeneSymbolList(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+        List<String> geneSymbols = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                if (elementName.equals("GeneSymbol")) {
+                    reader.next();
+                    if (isCharacterXMLEvent(reader)) {
+                        geneSymbols.add(reader.getText());
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        fields.put(new UnknownField("gene-symbols"), join(geneSymbols, ", "));
+    }
+
+    private void parseChemicalList(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+        List<String> chemicalNames = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                if (elementName.equals("NameOfSubstance")) {
+                    reader.next();
+                    if (isCharacterXMLEvent(reader)) {
+                        chemicalNames.add(reader.getText());
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        fields.put(new UnknownField("chemicals"), join(chemicalNames, ", "));
+    }
+
+    private void parseMedlineJournalInfo(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "Country" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("country"), reader.getText());
+                        }
+                    }
+                    case "MedlineTA" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("journal-abbreviation"), reader.getText());
+                        }
+                    }
+                    case "NlmUniqueID" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("nlm-id"), reader.getText());
+                        }
+                    }
+                    case "ISSNLinking" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("issn-linking"), reader.getText());
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
@@ -452,7 +665,7 @@ private void parseBookArticle(PubmedBookArticle currentArticle, List<BibEntry> b
                 fields.put(new UnknownField("sections"), join(result, "; "));
             }
             if (bookDocument.getKeywordList() != null) {
-                addKeyWords(fields, bookDocument.getKeywordList());
+//                addKeywords(fields, bookDocument.getKeywordList());
             }
             if (bookDocument.getContributionDate() != null) {
                 addContributionDate(fields, bookDocument.getContributionDate());
@@ -610,17 +823,17 @@ private void parseArticle(PubmedArticle article, List<BibEntry> bibItems) {
                 addGeneSymbols(fields, medlineCitation.getGeneSymbolList());
             }
             if (medlineCitation.getMeshHeadingList() != null) {
-                addMeashHeading(fields, medlineCitation.getMeshHeadingList());
+                // addMeshHeading(fields, medlineCitation.getMeshHeadingList());
             }
             putIfValueNotNull(fields, new UnknownField("references"), medlineCitation.getNumberOfReferences());
             if (medlineCitation.getPersonalNameSubjectList() != null) {
-                addPersonalNames(fields, medlineCitation.getPersonalNameSubjectList());
+//                addPersonalNames(fields, medlineCitation.getPersonalNameSubjectList());
             }
             if (medlineCitation.getOtherID() != null) {
-                addOtherId(fields, medlineCitation.getOtherID());
+//                addOtherId(fields, medlineCitation.getOtherID());
             }
             if (medlineCitation.getKeywordList() != null) {
-                addKeyWords(fields, medlineCitation.getKeywordList());
+//                addKeywords(fields, medlineCitation.getKeywordList());
             }
             if (medlineCitation.getSpaceFlightMission() != null) {
                 fields.put(new UnknownField("space-flight-mission"), join(medlineCitation.getSpaceFlightMission(), ", "));
@@ -691,69 +904,63 @@ private void addInvestigators(Map<Field, String> fields, InvestigatorList invest
         }
     }
 
-    private void addKeyWords(Map<Field, String> fields, List<KeywordList> allKeywordLists) {
-        List<String> keywordStrings = new ArrayList<>();
-        // add keywords to the list
-        for (KeywordList keywordList : allKeywordLists) {
-            for (Keyword keyword : keywordList.getKeyword()) {
-                for (Serializable content : keyword.getContent()) {
-                    if (content instanceof String) {
-                        keywordStrings.add((String) content);
-                    }
-                }
-            }
-        }
-        // Check whether MeshHeadingList exist or not
+    private void addKeywords(Map<Field, String> fields, List<String> keywordList) {
+        // Check whether MeshHeadingList exists or not
         if (fields.get(StandardField.KEYWORDS) == null) {
-            fields.put(StandardField.KEYWORDS, join(keywordStrings, KEYWORD_SEPARATOR));
+            fields.put(StandardField.KEYWORDS, join(keywordList, KEYWORD_SEPARATOR));
         } else {
-            if (keywordStrings.size() > 0) {
+            if (!keywordList.isEmpty()) {
                 // if it exists, combine the MeshHeading with the keywords
-                String result = join(keywordStrings, "; ");
+                String result = join(keywordList, "; ");
                 result = fields.get(StandardField.KEYWORDS) + KEYWORD_SEPARATOR + result;
                 fields.put(StandardField.KEYWORDS, result);
             }
         }
     }
 
-    private void addOtherId(Map<Field, String> fields, List<OtherID> otherID) {
-        for (OtherID id : otherID) {
-            if ((id.getSource() != null) && (id.getContent() != null)) {
-                fields.put(FieldFactory.parseField(StandardEntryType.Article, id.getSource()), id.getContent());
+    private void addOtherId(Map<Field, String> fields, List<OtherIDRec> otherIDList) {
+        for (OtherIDRec id : otherIDList) {
+            if (!id.source().isBlank() && !id.content().isBlank()) {
+                fields.put(FieldFactory.parseField(StandardEntryType.Article, id.source()), id.content());
             }
         }
     }
 
-    private void addPersonalNames(Map<Field, String> fields, PersonalNameSubjectList personalNameSubjectList) {
+    private void addPersonalNames(Map<Field, String> fields, List<PersonalNameSubjectRec> personalNameSubjectList) {
         if (fields.get(StandardField.AUTHOR) == null) {
             // if no authors appear, then add the personal names as authors
             List<String> personalNames = new ArrayList<>();
-            if (personalNameSubjectList.getPersonalNameSubject() != null) {
-                List<PersonalNameSubject> personalNameSubject = personalNameSubjectList.getPersonalNameSubject();
-                for (PersonalNameSubject personalName : personalNameSubject) {
-                    String name = personalName.getLastName();
-                    if (personalName.getForeName() != null) {
-                        name += ", " + personalName.getForeName();
+
+            if (!personalNameSubjectList.isEmpty()) {
+                for (PersonalNameSubjectRec personalNameSubject : personalNameSubjectList) {
+                    StringBuilder result = new StringBuilder(personalNameSubject.lastName());
+                    if (!personalNameSubject.foreName().isBlank()) {
+                        result.append(", ").append(personalNameSubject.foreName());
                     }
-                    personalNames.add(name);
+                    personalNames.add(result.toString());
                 }
+
                 fields.put(StandardField.AUTHOR, join(personalNames, " and "));
             }
         }
     }
 
-    private void addMeashHeading(Map<Field, String> fields, MeshHeadingList meshHeadingList) {
-        ArrayList<String> keywords = new ArrayList<>();
-        for (MeshHeading keyword : meshHeadingList.getMeshHeading()) {
-            StringBuilder result = new StringBuilder(keyword.getDescriptorName().getContent());
-            if (keyword.getQualifierName() != null) {
-                for (QualifierName qualifier : keyword.getQualifierName()) {
-                    result.append(", ").append(qualifier.getContent());
+    private void addMeshHeading(Map<Field, String> fields, List<MeshHeadingRec> meshHeadingList) {
+        List<String> keywords = new ArrayList<>();
+
+        if (!meshHeadingList.isEmpty()) {
+            for (MeshHeadingRec meshHeading : meshHeadingList) {
+                StringBuilder result = new StringBuilder(meshHeading.descriptorName());
+                if (meshHeading.qualifierNames() != null) {
+                    for (String qualifierName : meshHeading.qualifierNames()) {
+                        result.append(", ").append(qualifierName);
+                    }
                 }
+                keywords.add(result.toString());
             }
-            keywords.add(result.toString());
+
+            fields.put(StandardField.KEYWORDS, join(keywords, KEYWORD_SEPARATOR));
         }
-        fields.put(StandardField.KEYWORDS, join(keywords, KEYWORD_SEPARATOR));
     }
 
     private void addGeneSymbols(Map<Field, String> fields, GeneSymbolList geneSymbolList) {
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java
new file mode 100644
index 00000000000..413fcf64960
--- /dev/null
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java
@@ -0,0 +1,9 @@
+package org.jabref.logic.importer.fileformat.medline;
+
+import java.util.List;
+
+public record MeshHeadingRec(
+        String descriptorName,
+        List<String> qualifierNames
+) {
+}
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java
new file mode 100644
index 00000000000..d653df925d3
--- /dev/null
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java
@@ -0,0 +1,7 @@
+package org.jabref.logic.importer.fileformat.medline;
+
+public record OtherIDRec(
+        String source,
+        String content
+) {
+}
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java
new file mode 100644
index 00000000000..202c800ef1e
--- /dev/null
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java
@@ -0,0 +1,7 @@
+package org.jabref.logic.importer.fileformat.medline;
+
+public record PersonalNameSubjectRec(
+        String lastName,
+        String foreName
+) {
+}

From 346968ddc91a2346e267ab201763fb47835d2618 Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Mon, 13 Mar 2023 20:45:52 -0700
Subject: [PATCH 4/8] add PubmedData parser

---
 .../importer/fileformat/MedlineImporter.java  | 330 +++++++-----------
 .../fileformat/medline/ArticleIDRec.java      |   7 +
 .../fileformat/medline/InvestigatorRec.java   |  10 +
 3 files changed, 147 insertions(+), 200 deletions(-)
 create mode 100644 src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java
 create mode 100644 src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index 7c5dedc0fd1..a660de590cf 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -26,9 +26,7 @@
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.ParserResult;
 import org.jabref.logic.importer.fileformat.medline.Abstract;
-import org.jabref.logic.importer.fileformat.medline.AffiliationInfo;
-import org.jabref.logic.importer.fileformat.medline.ArticleId;
-import org.jabref.logic.importer.fileformat.medline.ArticleIdList;
+import org.jabref.logic.importer.fileformat.medline.ArticleIDRec;
 import org.jabref.logic.importer.fileformat.medline.ArticleTitle;
 import org.jabref.logic.importer.fileformat.medline.AuthorList;
 import org.jabref.logic.importer.fileformat.medline.Book;
@@ -36,26 +34,16 @@
 import org.jabref.logic.importer.fileformat.medline.BookTitle;
 import org.jabref.logic.importer.fileformat.medline.Chemical;
 import org.jabref.logic.importer.fileformat.medline.ContributionDate;
-import org.jabref.logic.importer.fileformat.medline.DateCompleted;
-import org.jabref.logic.importer.fileformat.medline.DateCreated;
 import org.jabref.logic.importer.fileformat.medline.DateRevised;
 import org.jabref.logic.importer.fileformat.medline.ELocationID;
 import org.jabref.logic.importer.fileformat.medline.GeneSymbolList;
-import org.jabref.logic.importer.fileformat.medline.GeneralNote;
-import org.jabref.logic.importer.fileformat.medline.ISSN;
-import org.jabref.logic.importer.fileformat.medline.Investigator;
-import org.jabref.logic.importer.fileformat.medline.InvestigatorList;
-import org.jabref.logic.importer.fileformat.medline.Journal;
-import org.jabref.logic.importer.fileformat.medline.JournalIssue;
-import org.jabref.logic.importer.fileformat.medline.MedlineCitation;
-import org.jabref.logic.importer.fileformat.medline.MedlineJournalInfo;
+import org.jabref.logic.importer.fileformat.medline.InvestigatorRec;
 import org.jabref.logic.importer.fileformat.medline.MeshHeadingRec;
 import org.jabref.logic.importer.fileformat.medline.OtherIDRec;
 import org.jabref.logic.importer.fileformat.medline.Pagination;
 import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectRec;
 import org.jabref.logic.importer.fileformat.medline.PublicationType;
 import org.jabref.logic.importer.fileformat.medline.Publisher;
-import org.jabref.logic.importer.fileformat.medline.PubmedArticle;
 import org.jabref.logic.importer.fileformat.medline.PubmedBookArticle;
 import org.jabref.logic.importer.fileformat.medline.PubmedBookData;
 import org.jabref.logic.importer.fileformat.medline.Section;
@@ -72,9 +60,6 @@
 import org.jabref.model.strings.StringUtil;
 
 import com.google.common.base.Joiner;
-import jakarta.xml.bind.JAXBContext;
-import jakarta.xml.bind.JAXBException;
-import jakarta.xml.bind.Unmarshaller;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -89,7 +74,6 @@ public class MedlineImporter extends Importer implements Parser {
     private static final String KEYWORD_SEPARATOR = "; ";
 
     private static final Locale ENGLISH = Locale.ENGLISH;
-    private Unmarshaller unmarshaller;
 
     private static String join(List<String> list, String string) {
         return Joiner.on(string).join(list);
@@ -151,7 +135,7 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
                     switch (elementName) {
                         case "PubmedArticle" -> {
                             // Case 3: PubmedArticle
-                            parseArticleNew(reader, bibItems, elementName);
+                            parseArticle(reader, bibItems, elementName);
                         }
                         // Case 1: PubmedArticleSet
 
@@ -197,7 +181,7 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
         return new ParserResult(bibItems);
     }
 
-    private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems, String parentElement) throws XMLStreamException {
+    private void parseArticle(XMLStreamReader reader, List<BibEntry> bibItems, String startElement) throws XMLStreamException {
         Map<Field, String> fields = new HashMap<>();
 
         while (reader.hasNext()) {
@@ -209,12 +193,12 @@ private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems, St
                         parseMedlineCitation(reader, fields, elementName);
                     }
                     case "PubmedData" -> {
-                        //
+                        parsePubmedData(reader, fields, elementName);
                     }
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(parentElement)) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
@@ -225,7 +209,45 @@ private void parseArticleNew(XMLStreamReader reader, List<BibEntry> bibItems, St
         bibItems.add(entry);
     }
 
-    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields, String parentElement) throws XMLStreamException {
+    private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+        String publicationStatus = "";
+        List<ArticleIDRec> articleIDList = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "PublicationStatus" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            publicationStatus = reader.getText();
+                        }
+                    }
+                    case "ArticleId" -> {
+                        String idType = reader.getAttributeValue(null, "IdType");
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            articleIDList.add(new ArticleIDRec(idType, reader.getText()));
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        if (fields.get(new UnknownField("revised")) != null) {
+            putIfValueNotNull(fields, StandardField.PUBSTATE, publicationStatus);
+            if (!articleIDList.isEmpty()) {
+                addArticleIdList(fields, articleIDList);
+            }
+        }
+    }
+
+    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
         // multiple occurrences of the following fields can be present
         List<String> citationSubsets = new ArrayList<>();
         List<MeshHeadingRec> meshHeadingList = new ArrayList<>();
@@ -233,6 +255,8 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         List<OtherIDRec> otherIDList = new ArrayList<>();
         List<String> keywordList = new ArrayList<>();
         List<String> spaceFlightMissionList = new ArrayList<>();
+        List<InvestigatorRec> investigatorList = new ArrayList<>();
+        List<String> generalNoteList = new ArrayList<>();
 
         String status = reader.getAttributeValue(null, "Status");
         String owner = reader.getAttributeValue(null, "Owner");
@@ -244,8 +268,8 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
             if (isStartXMLEvent(reader)) {
                 String elementName = reader.getName().getLocalPart();
                 switch (elementName) {
-                    case "DateCreated", "DateCompleted" -> {
-                        parseDate(reader, elementName, fields);
+                    case "DateCreated", "DateCompleted", "DateRevised" -> {
+                        parseDate(reader, fields, elementName);
                     }
                     case "Article" -> {
                         parseArticleInformation(reader, fields);
@@ -303,16 +327,19 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                             spaceFlightMissionList.add(reader.getText());
                         }
                     }
-                    case "InvestigatorList" -> {
-                        // TODO
+                    case "Investigator" -> {
+                        parseInvestigator(reader, investigatorList, elementName);
                     }
                     case "GeneralNote" -> {
-                        // TODO
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            generalNoteList.add(reader.getText());
+                        }
                     }
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(parentElement)) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
@@ -324,6 +351,48 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         addOtherId(fields, otherIDList);
         addKeywords(fields, keywordList);
         fields.put(new UnknownField("space-flight-mission"), join(spaceFlightMissionList, ", "));
+        addInvestigators(fields, investigatorList);
+        addNotes(fields, generalNoteList);
+    }
+
+    private void parseInvestigator(XMLStreamReader reader, List<InvestigatorRec> investigatorList, String startElement)
+            throws XMLStreamException {
+        String lastName = "";
+        String foreName = "";
+        List<String> affiliationList = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "LastName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            lastName = reader.getText();
+                        }
+                    }
+                    case "ForeName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            foreName = reader.getText();
+                        }
+                    }
+                    case "Affiliation" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            affiliationList.add(reader.getText());
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        investigatorList.add(new InvestigatorRec(lastName, foreName, affiliationList));
     }
 
     private void parsePersonalNameSubject(XMLStreamReader reader, List<PersonalNameSubjectRec> personalNameSubjectList, String startElement)
@@ -566,7 +635,7 @@ private void parseJournal(XMLStreamReader reader, Map<Field, String> fields) thr
         }
     }
 
-    private void parseDate(XMLStreamReader reader, String parentElement, Map<Field, String> fields) throws XMLStreamException {
+    private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
         Optional<String> year = Optional.empty();
         Optional<String> month = Optional.empty();
         Optional<String> day = Optional.empty();
@@ -574,7 +643,8 @@ private void parseDate(XMLStreamReader reader, String parentElement, Map<Field,
         // mapping from date XML element to field name
         Map<String, String> dateFieldMap = Map.of(
                 "DateCreated", "created",
-                "DateCompleted", "completed"
+                "DateCompleted", "completed",
+                "DateRevised", "revised"
         );
 
         while (reader.hasNext()) {
@@ -603,36 +673,14 @@ private void parseDate(XMLStreamReader reader, String parentElement, Map<Field,
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(parentElement)) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
 
         Optional<Date> date = Date.parse(year, month, day);
         date.ifPresent(dateValue ->
-                fields.put(new UnknownField(dateFieldMap.get(parentElement)), dateValue.getNormalized()));
-    }
-
-    private Object unmarshallRoot(BufferedReader reader) throws JAXBException, XMLStreamException {
-        initUmarshaller();
-
-        XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
-        XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(reader);
-
-        // go to the root element
-        while (!xmlStreamReader.isStartElement()) {
-            xmlStreamReader.next();
-        }
-
-        return unmarshaller.unmarshal(xmlStreamReader);
-    }
-
-    private void initUmarshaller() throws JAXBException {
-        if (unmarshaller == null) {
-            // Lazy init because this is expensive
-            JAXBContext context = JAXBContext.newInstance("org.jabref.logic.importer.fileformat.medline");
-            unmarshaller = context.createUnmarshaller();
-        }
+                fields.put(new UnknownField(dateFieldMap.get(startElement)), dateValue.getNormalized()));
     }
 
     private void parseBookArticle(PubmedBookArticle currentArticle, List<BibEntry> bibItems) {
@@ -770,136 +818,53 @@ private String convertToDateFormat(String year, String month, String day) {
         return String.format("%s-%s-%s", year, month, day);
     }
 
-    private void parseArticle(PubmedArticle article, List<BibEntry> bibItems) {
-        Map<Field, String> fields = new HashMap<>();
-
-        if (article.getPubmedData() != null) {
-            if (article.getMedlineCitation().getDateRevised() != null) {
-                DateRevised dateRevised = article.getMedlineCitation().getDateRevised();
-                addDateRevised(fields, dateRevised);
-                putIfValueNotNull(fields, StandardField.PUBSTATE, article.getPubmedData().getPublicationStatus());
-                if (article.getPubmedData().getArticleIdList() != null) {
-                    ArticleIdList articleIdList = article.getPubmedData().getArticleIdList();
-                    addArticleIdList(fields, articleIdList);
-                }
-            }
-        }
-        if (article.getMedlineCitation() != null) {
-            MedlineCitation medlineCitation = article.getMedlineCitation();
-
-            fields.put(new UnknownField("status"), medlineCitation.getStatus());
-            DateCreated dateCreated = medlineCitation.getDateCreated();
-            if (medlineCitation.getDateCreated() != null) {
-                fields.put(new UnknownField("created"),
-                        convertToDateFormat(dateCreated.getYear(), dateCreated.getMonth(), dateCreated.getDay()));
-            }
-            fields.put(new UnknownField("pubmodel"), medlineCitation.getArticle().getPubModel());
-
-            if (medlineCitation.getDateCompleted() != null) {
-                DateCompleted dateCompleted = medlineCitation.getDateCompleted();
-                fields.put(new UnknownField("completed"),
-                        convertToDateFormat(dateCompleted.getYear(), dateCompleted.getMonth(), dateCompleted.getDay()));
-            }
-
-            fields.put(StandardField.PMID, medlineCitation.getPMID().getContent());
-            fields.put(StandardField.OWNER, medlineCitation.getOwner());
-
-            addArticleInformation(fields, medlineCitation.getArticle().getContent());
-
-            MedlineJournalInfo medlineJournalInfo = medlineCitation.getMedlineJournalInfo();
-            putIfValueNotNull(fields, new UnknownField("country"), medlineJournalInfo.getCountry());
-            putIfValueNotNull(fields, new UnknownField("journal-abbreviation"), medlineJournalInfo.getMedlineTA());
-            putIfValueNotNull(fields, new UnknownField("nlm-id"), medlineJournalInfo.getNlmUniqueID());
-            putIfValueNotNull(fields, new UnknownField("issn-linking"), medlineJournalInfo.getISSNLinking());
-            if (medlineCitation.getChemicalList() != null) {
-                if (medlineCitation.getChemicalList().getChemical() != null) {
-                    addChemicals(fields, medlineCitation.getChemicalList().getChemical());
-                }
-            }
-            if (medlineCitation.getCitationSubset() != null) {
-                fields.put(new UnknownField("citation-subset"), join(medlineCitation.getCitationSubset(), ", "));
-            }
-            if (medlineCitation.getGeneSymbolList() != null) {
-                addGeneSymbols(fields, medlineCitation.getGeneSymbolList());
-            }
-            if (medlineCitation.getMeshHeadingList() != null) {
-                // addMeshHeading(fields, medlineCitation.getMeshHeadingList());
-            }
-            putIfValueNotNull(fields, new UnknownField("references"), medlineCitation.getNumberOfReferences());
-            if (medlineCitation.getPersonalNameSubjectList() != null) {
-//                addPersonalNames(fields, medlineCitation.getPersonalNameSubjectList());
-            }
-            if (medlineCitation.getOtherID() != null) {
-//                addOtherId(fields, medlineCitation.getOtherID());
-            }
-            if (medlineCitation.getKeywordList() != null) {
-//                addKeywords(fields, medlineCitation.getKeywordList());
-            }
-            if (medlineCitation.getSpaceFlightMission() != null) {
-                fields.put(new UnknownField("space-flight-mission"), join(medlineCitation.getSpaceFlightMission(), ", "));
-            }
-            if (medlineCitation.getInvestigatorList() != null) {
-                addInvestigators(fields, medlineCitation.getInvestigatorList());
-            }
-            if (medlineCitation.getGeneralNote() != null) {
-                addNotes(fields, medlineCitation.getGeneralNote());
-            }
-        }
-
-        BibEntry entry = new BibEntry(StandardEntryType.Article);
-        entry.setField(fields);
-
-        bibItems.add(entry);
-    }
-
-    private void addArticleIdList(Map<Field, String> fields, ArticleIdList articleIdList) {
-        for (ArticleId id : articleIdList.getArticleId()) {
-            if (id.getIdType() != null) {
-                if ("pubmed".equals(id.getIdType())) {
-                    fields.put(StandardField.PMID, id.getContent());
+    private void addArticleIdList(Map<Field, String> fields, List<ArticleIDRec> articleIdList) {
+        for (ArticleIDRec id : articleIdList) {
+            if (!id.idType().isBlank()) {
+                if ("pubmed".equals(id.idType())) {
+                    fields.put(StandardField.PMID, id.content());
                 } else {
-                    fields.put(FieldFactory.parseField(StandardEntryType.Article, id.getIdType()), id.getContent());
+                    fields.put(FieldFactory.parseField(StandardEntryType.Article, id.idType()), id.content());
                 }
             }
         }
     }
 
-    private void addNotes(Map<Field, String> fields, List<GeneralNote> generalNote) {
+    private void addNotes(Map<Field, String> fields, List<String> generalNoteList) {
         List<String> notes = new ArrayList<>();
-        for (GeneralNote note : generalNote) {
-            if (note != null) {
-                notes.add(note.getContent());
+
+        for (String note : generalNoteList) {
+            if (!note.isBlank()) {
+                notes.add(note);
             }
         }
+
         fields.put(StandardField.NOTE, join(notes, ", "));
     }
 
-    private void addInvestigators(Map<Field, String> fields, InvestigatorList investigatorList) {
+    private void addInvestigators(Map<Field, String> fields, List<InvestigatorRec> investigatorList) {
         List<String> investigatorNames = new ArrayList<>();
         List<String> affiliationInfos = new ArrayList<>();
-        String name;
+
         // add the investigators like the authors
-        if (investigatorList.getInvestigator() != null) {
-            List<Investigator> investigators = investigatorList.getInvestigator();
-            for (Investigator investigator : investigators) {
-                name = investigator.getLastName();
-                if (investigator.getForeName() != null) {
-                    name += ", " + investigator.getForeName();
+        if (!investigatorList.isEmpty()) {
+            for (InvestigatorRec investigator : investigatorList) {
+                StringBuilder result = new StringBuilder(investigator.lastName());
+                if (!investigator.foreName().isBlank()) {
+                    result.append(", ").append(investigator.foreName());
                 }
-                investigatorNames.add(name);
+                investigatorNames.add(result.toString());
 
                 // now add the affiliation info
-                if (investigator.getAffiliationInfo() != null) {
-                    for (AffiliationInfo info : investigator.getAffiliationInfo()) {
-                        for (Serializable affiliation : info.getAffiliation().getContent()) {
-                            if (affiliation instanceof String) {
-                                affiliationInfos.add((String) affiliation);
-                            }
-                        }
-                    }
-                    fields.put(new UnknownField("affiliation"), join(affiliationInfos, ", "));
+                if (!investigator.affiliationList().isEmpty()) {
+                    affiliationInfos.addAll(investigator.affiliationList());
                 }
             }
+
+            if (!affiliationInfos.isEmpty()) {
+                fields.put(new UnknownField("affiliation"), join(affiliationInfos, ", "));
+            }
+
             fields.put(new UnknownField("investigator"), join(investigatorNames, " and "));
         }
     }
@@ -978,41 +943,6 @@ private void addChemicals(Map<Field, String> fields, List<Chemical> chemicals) {
         fields.put(new UnknownField("chemicals"), join(chemicalNames, ", "));
     }
 
-    private void addArticleInformation(Map<Field, String> fields, List<Object> content) {
-        for (Object object : content) {
-            if (object instanceof Journal) {
-                Journal journal = (Journal) object;
-                putIfValueNotNull(fields, StandardField.JOURNAL, journal.getTitle());
-
-                ISSN issn = journal.getISSN();
-                if (issn != null) {
-                    putIfValueNotNull(fields, StandardField.ISSN, issn.getContent());
-                }
-
-                JournalIssue journalIssue = journal.getJournalIssue();
-                putIfValueNotNull(fields, StandardField.VOLUME, journalIssue.getVolume());
-                putIfValueNotNull(fields, StandardField.ISSUE, journalIssue.getIssue());
-
-                // addPubDate(fields, journalIssue.getPubDate());
-            } else if (object instanceof ArticleTitle) {
-                ArticleTitle articleTitle = (ArticleTitle) object;
-                fields.put(StandardField.TITLE, StringUtil.stripBrackets(articleTitle.getContent().toString()));
-            } else if (object instanceof Pagination) {
-                Pagination pagination = (Pagination) object;
-                // addPagination(fields, pagination);
-            } else if (object instanceof ELocationID) {
-                ELocationID eLocationID = (ELocationID) object;
-//                addElocationID(fields, eLocationID);
-            } else if (object instanceof Abstract) {
-                Abstract abs = (Abstract) object;
-                // addAbstract(fields, abs);
-            } else if (object instanceof AuthorList) {
-                AuthorList authors = (AuthorList) object;
-//                handleAuthorList(fields, authors);
-            }
-        }
-    }
-
     private void addPubDate(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
         while (reader.hasNext()) {
             reader.next();
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java
new file mode 100644
index 00000000000..b39cfc7b8f8
--- /dev/null
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java
@@ -0,0 +1,7 @@
+package org.jabref.logic.importer.fileformat.medline;
+
+public record ArticleIDRec(
+        String idType,
+        String content
+) {
+}
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java
new file mode 100644
index 00000000000..a2efb856d7b
--- /dev/null
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java
@@ -0,0 +1,10 @@
+package org.jabref.logic.importer.fileformat.medline;
+
+import java.util.List;
+
+public record InvestigatorRec(
+        String lastName,
+        String foreName,
+        List<String> affiliationList
+) {
+}

From 12c7f2786a103f032dfe62892a16f7ce416937b2 Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Tue, 14 Mar 2023 21:47:50 -0700
Subject: [PATCH 5/8] add BookArticle parser

---
 .../importer/fileformat/MedlineImporter.java  | 520 ++++++++++--------
 .../fileformat/MedlineImporterTestNbib.bib    |   2 +-
 2 files changed, 285 insertions(+), 237 deletions(-)

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index a660de590cf..86ec8e13f3c 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -4,7 +4,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.Serializable;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -25,29 +24,11 @@
 import org.jabref.logic.importer.ParseException;
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.ParserResult;
-import org.jabref.logic.importer.fileformat.medline.Abstract;
 import org.jabref.logic.importer.fileformat.medline.ArticleIDRec;
-import org.jabref.logic.importer.fileformat.medline.ArticleTitle;
-import org.jabref.logic.importer.fileformat.medline.AuthorList;
-import org.jabref.logic.importer.fileformat.medline.Book;
-import org.jabref.logic.importer.fileformat.medline.BookDocument;
-import org.jabref.logic.importer.fileformat.medline.BookTitle;
-import org.jabref.logic.importer.fileformat.medline.Chemical;
-import org.jabref.logic.importer.fileformat.medline.ContributionDate;
-import org.jabref.logic.importer.fileformat.medline.DateRevised;
-import org.jabref.logic.importer.fileformat.medline.ELocationID;
-import org.jabref.logic.importer.fileformat.medline.GeneSymbolList;
 import org.jabref.logic.importer.fileformat.medline.InvestigatorRec;
 import org.jabref.logic.importer.fileformat.medline.MeshHeadingRec;
 import org.jabref.logic.importer.fileformat.medline.OtherIDRec;
-import org.jabref.logic.importer.fileformat.medline.Pagination;
 import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectRec;
-import org.jabref.logic.importer.fileformat.medline.PublicationType;
-import org.jabref.logic.importer.fileformat.medline.Publisher;
-import org.jabref.logic.importer.fileformat.medline.PubmedBookArticle;
-import org.jabref.logic.importer.fileformat.medline.PubmedBookData;
-import org.jabref.logic.importer.fileformat.medline.Section;
-import org.jabref.logic.importer.fileformat.medline.Sections;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.Date;
@@ -134,45 +115,14 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
                     String elementName = reader.getName().getLocalPart();
                     switch (elementName) {
                         case "PubmedArticle" -> {
-                            // Case 3: PubmedArticle
                             parseArticle(reader, bibItems, elementName);
                         }
-                        // Case 1: PubmedArticleSet
-
-                        // Case 2: PubmedBookArticleSet
-
-                        // Case 4: PubmedBookArticle
+                        case "PubmedBookArticle" -> {
+                            parseBookArticle(reader, bibItems, elementName);
+                        }
                     }
                 }
             }
-
-//            Object unmarshalledObject = unmarshallRoot(reader);
-//
-//            // check whether we have an article set, an article, a book article or a book article set
-//            if (unmarshalledObject instanceof PubmedArticleSet) {
-//                PubmedArticleSet articleSet = (PubmedArticleSet) unmarshalledObject;
-//                for (Object article : articleSet.getPubmedArticleOrPubmedBookArticle()) {
-//                    if (article instanceof PubmedArticle) {
-//                        PubmedArticle currentArticle = (PubmedArticle) article;
-//                        parseArticle(currentArticle, bibItems);
-//                    }
-//                    if (article instanceof PubmedBookArticle) {
-//                        PubmedBookArticle currentArticle = (PubmedBookArticle) article;
-//                        parseBookArticle(currentArticle, bibItems);
-//                    }
-//                }
-//            } else if (unmarshalledObject instanceof PubmedArticle) {
-//                PubmedArticle article = (PubmedArticle) unmarshalledObject;
-//                parseArticle(article, bibItems);
-//            } else if (unmarshalledObject instanceof PubmedBookArticle) {
-//                PubmedBookArticle currentArticle = (PubmedBookArticle) unmarshalledObject;
-//                parseBookArticle(currentArticle, bibItems);
-//            } else {
-//                PubmedBookArticleSet bookArticleSet = (PubmedBookArticleSet) unmarshalledObject;
-//                for (PubmedBookArticle bookArticle : bookArticleSet.getPubmedBookArticle()) {
-//                    parseBookArticle(bookArticle, bibItems);
-//                }
-//            }
         } catch (XMLStreamException e) {
             LOGGER.debug("could not parse document", e);
             return ParserResult.fromError(e);
@@ -181,7 +131,232 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
         return new ParserResult(bibItems);
     }
 
-    private void parseArticle(XMLStreamReader reader, List<BibEntry> bibItems, String startElement) throws XMLStreamException {
+    private void parseBookArticle(XMLStreamReader reader, List<BibEntry> bibItems, String startElement)
+            throws XMLStreamException {
+        Map<Field, String> fields = new HashMap<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "BookDocument" -> {
+                        parseBookDocument(reader, fields, elementName);
+                    }
+                    case "PublicationStatus" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.PUBSTATE, reader.getText());
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        BibEntry entry = new BibEntry(StandardEntryType.Article);
+        entry.setField(fields);
+
+        bibItems.add(entry);
+    }
+
+    private void parseBookDocument(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
+        // multiple occurrences of the following fields can be present
+        List<String> sectionTitleList = new ArrayList<>();
+        List<String> keywordList = new ArrayList<>();
+        List<String> publicationTypeList = new ArrayList<>();
+        List<String> articleTitleList = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "PMID" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            fields.put(StandardField.PMID, reader.getText());
+                        }
+                    }
+                    case "DateRevised", "ContributionDate" -> {
+                        parseDate(reader, fields, elementName);
+                    }
+                    case "Abstract" -> {
+                        addAbstract(reader, fields, elementName);
+                    }
+                    case "Pagination" -> {
+                        addPagination(reader, fields, elementName);
+                    }
+                    case "Section" -> {
+                        parseSections(reader, sectionTitleList);
+                    }
+                    case "Keyword" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            keywordList.add(reader.getText());
+                        }
+                    }
+                    case "PublicationType" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            publicationTypeList.add(reader.getText());
+                        }
+                    }
+                    case "ArticleTitle" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            articleTitleList.add(reader.getText());
+                        }
+                    }
+                    case "Book" -> {
+                        parseBookInformation(reader, fields, elementName);
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        // populate multiple occurrence fields
+        if (!sectionTitleList.isEmpty()) {
+            fields.put(new UnknownField("sections"), join(sectionTitleList, "; "));
+        }
+        addKeywords(fields, keywordList);
+        if (!publicationTypeList.isEmpty()) {
+            fields.put(new UnknownField("pubtype"), join(publicationTypeList, ", "));
+        }
+        if (!articleTitleList.isEmpty()) {
+            fields.put(new UnknownField("article"), join(articleTitleList, ", "));
+        }
+    }
+
+    private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
+        List<String> isbnList = new ArrayList<>();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "PublisherName" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.PUBLISHER, reader.getText());
+                        }
+                    }
+                    case "PublisherLocation" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("publocation"), reader.getText());
+                        }
+                    }
+                    case "BookTitle" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.TITLE, reader.getText());
+                        }
+                    }
+                    case "PubDate" -> {
+                        addPubDate(reader, fields, elementName);
+                    }
+                    case "AuthorList" -> {
+                        handleAuthorList(reader, fields, elementName);
+                    }
+                    case "Volume" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.VOLUME, reader.getText());
+                        }
+                    }
+                    case "Edition" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, StandardField.EDITION, reader.getText());
+                        }
+                    }
+                    case "Medium" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("medium"), reader.getText());
+                        }
+                    }
+                    case "ReportNumber" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            putIfValueNotNull(fields, new UnknownField("reportnumber"), reader.getText());
+                        }
+                    }
+                    case "ELocationID" -> {
+                        String eidType = reader.getAttributeValue(null, "EIdType");
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            if (eidType.equals("doi")) {
+                                fields.put(StandardField.DOI, reader.getText());
+                            }
+                            if (eidType.equals("pii")) {
+                                fields.put(new UnknownField("pii"), reader.getText());
+                            }
+                        }
+                    }
+                    case "Isbn" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader)) {
+                            isbnList.add(reader.getText());
+                        }
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        if (!isbnList.isEmpty()) {
+            fields.put(StandardField.ISBN, join(isbnList, ", "));
+        }
+    }
+
+    private void parseSections(XMLStreamReader reader, List<String> sectionTitleList) throws XMLStreamException {
+        int sectionLevel = 0;
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "SectionTitle" -> {
+                        reader.next();
+                        if (isCharacterXMLEvent(reader) && sectionLevel == 0) {
+                            // we only collect SectionTitles from root level Sections
+                            sectionTitleList.add(reader.getText());
+                        }
+                    }
+                    case "Section" -> {
+                        sectionLevel++;
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Section")) {
+                if (sectionLevel == 0) {
+                    break;
+                } else {
+                    sectionLevel--;
+                }
+            }
+        }
+    }
+
+    private void parseArticle(XMLStreamReader reader, List<BibEntry> bibItems, String startElement)
+            throws XMLStreamException {
         Map<Field, String> fields = new HashMap<>();
 
         while (reader.hasNext()) {
@@ -209,7 +384,8 @@ private void parseArticle(XMLStreamReader reader, List<BibEntry> bibItems, Strin
         bibItems.add(entry);
     }
 
-    private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+    private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         String publicationStatus = "";
         List<ArticleIDRec> articleIDList = new ArrayList<>();
 
@@ -247,7 +423,8 @@ private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields,
         }
     }
 
-    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+    private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         // multiple occurrences of the following fields can be present
         List<String> citationSubsets = new ArrayList<>();
         List<MeshHeadingRec> meshHeadingList = new ArrayList<>();
@@ -260,6 +437,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
 
         String status = reader.getAttributeValue(null, "Status");
         String owner = reader.getAttributeValue(null, "Owner");
+        int latestVersion = 0;
         fields.put(new UnknownField("status"), status);
         fields.put(StandardField.OWNER, owner);
 
@@ -275,9 +453,14 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                         parseArticleInformation(reader, fields);
                     }
                     case "PMID" -> {
+                        String versionStr = reader.getAttributeValue(null, "Version");
                         reader.next();
-                        if (isCharacterXMLEvent(reader)) {
-                            fields.put(StandardField.PMID, reader.getText());
+                        if (versionStr != null) {
+                            int version = Integer.parseInt(versionStr);
+                            if (isCharacterXMLEvent(reader) && version > latestVersion) {
+                                latestVersion = version;
+                                fields.put(StandardField.PMID, reader.getText());
+                            }
                         }
                     }
                     case "MedlineJournalInfo" -> {
@@ -292,7 +475,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                             citationSubsets.add(reader.getText());
                         }
                     }
-                    case "GeneSymbol" -> {
+                    case "GeneSymbolList" -> {
                         parseGeneSymbolList(reader, fields, elementName);
                     }
                     case "MeshHeading" -> {
@@ -345,12 +528,16 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         }
 
         // populate multiple occurrence fields
-        fields.put(new UnknownField("citation-subset"), join(citationSubsets, ", "));
+        if (!citationSubsets.isEmpty()) {
+            fields.put(new UnknownField("citation-subset"), join(citationSubsets, ", "));
+        }
         addMeshHeading(fields, meshHeadingList);
         addPersonalNames(fields, personalNameSubjectList);
         addOtherId(fields, otherIDList);
         addKeywords(fields, keywordList);
-        fields.put(new UnknownField("space-flight-mission"), join(spaceFlightMissionList, ", "));
+        if (!spaceFlightMissionList.isEmpty()) {
+            fields.put(new UnknownField("space-flight-mission"), join(spaceFlightMissionList, ", "));
+        }
         addInvestigators(fields, investigatorList);
         addNotes(fields, generalNoteList);
     }
@@ -428,7 +615,8 @@ private void parsePersonalNameSubject(XMLStreamReader reader, List<PersonalNameS
         personalNameSubjectList.add(new PersonalNameSubjectRec(lastName, foreName));
     }
 
-    private void parseMeshHeading(XMLStreamReader reader, List<MeshHeadingRec> meshHeadingList, String startElement) throws XMLStreamException {
+    private void parseMeshHeading(XMLStreamReader reader, List<MeshHeadingRec> meshHeadingList, String startElement)
+            throws XMLStreamException {
         String descriptorName = "";
         List<String> qualifierNames = new ArrayList<>();
 
@@ -460,7 +648,8 @@ private void parseMeshHeading(XMLStreamReader reader, List<MeshHeadingRec> meshH
         meshHeadingList.add(new MeshHeadingRec(descriptorName, qualifierNames));
     }
 
-    private void parseGeneSymbolList(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+    private void parseGeneSymbolList(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         List<String> geneSymbols = new ArrayList<>();
 
         while (reader.hasNext()) {
@@ -480,10 +669,13 @@ private void parseGeneSymbolList(XMLStreamReader reader, Map<Field, String> fiel
             }
         }
 
-        fields.put(new UnknownField("gene-symbols"), join(geneSymbols, ", "));
+        if (!geneSymbols.isEmpty()) {
+            fields.put(new UnknownField("gene-symbols"), join(geneSymbols, ", "));
+        }
     }
 
-    private void parseChemicalList(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+    private void parseChemicalList(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         List<String> chemicalNames = new ArrayList<>();
 
         while (reader.hasNext()) {
@@ -506,7 +698,8 @@ private void parseChemicalList(XMLStreamReader reader, Map<Field, String> fields
         fields.put(new UnknownField("chemicals"), join(chemicalNames, ", "));
     }
 
-    private void parseMedlineJournalInfo(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+    private void parseMedlineJournalInfo(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         while (reader.hasNext()) {
             reader.next();
             if (isStartXMLEvent(reader)) {
@@ -564,7 +757,7 @@ private void parseArticleInformation(XMLStreamReader reader, Map<Field, String>
                         }
                     }
                     case "Pagination" -> {
-                        addPagination(reader, fields);
+                        addPagination(reader, fields, elementName);
                     }
                     case "ELocationID" -> {
                         String eidType = reader.getAttributeValue(null, "EIdType");
@@ -579,10 +772,10 @@ private void parseArticleInformation(XMLStreamReader reader, Map<Field, String>
                         }
                     }
                     case "Abstract" -> {
-                        addAbstract(reader, fields);
+                        addAbstract(reader, fields, elementName);
                     }
                     case "AuthorList" -> {
-                        handleAuthorList(reader, fields);
+                        handleAuthorList(reader, fields, elementName);
                     }
                 }
             }
@@ -624,7 +817,7 @@ private void parseJournal(XMLStreamReader reader, Map<Field, String> fields) thr
                         }
                     }
                     case "PubDate" -> {
-                        addPubDate(reader, fields);
+                        addPubDate(reader, fields, elementName);
                     }
                 }
             }
@@ -635,7 +828,8 @@ private void parseJournal(XMLStreamReader reader, Map<Field, String> fields) thr
         }
     }
 
-    private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
+    private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         Optional<String> year = Optional.empty();
         Optional<String> month = Optional.empty();
         Optional<String> day = Optional.empty();
@@ -644,7 +838,9 @@ private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String
         Map<String, String> dateFieldMap = Map.of(
                 "DateCreated", "created",
                 "DateCompleted", "completed",
-                "DateRevised", "revised"
+                "DateRevised", "revised",
+                "ContributionDate", "contribution",
+                "PubDate", ""
         );
 
         while (reader.hasNext()) {
@@ -683,137 +879,6 @@ private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String
                 fields.put(new UnknownField(dateFieldMap.get(startElement)), dateValue.getNormalized()));
     }
 
-    private void parseBookArticle(PubmedBookArticle currentArticle, List<BibEntry> bibItems) {
-        Map<Field, String> fields = new HashMap<>();
-        if (currentArticle.getBookDocument() != null) {
-            BookDocument bookDocument = currentArticle.getBookDocument();
-            fields.put(StandardField.PMID, bookDocument.getPMID().getContent());
-            if (bookDocument.getDateRevised() != null) {
-                DateRevised dateRevised = bookDocument.getDateRevised();
-                addDateRevised(fields, dateRevised);
-            }
-            if (bookDocument.getAbstract() != null) {
-                Abstract abs = bookDocument.getAbstract();
-                // addAbstract(fields, abs);
-            }
-            if (bookDocument.getPagination() != null) {
-                Pagination pagination = bookDocument.getPagination();
-                // addPagination(fields, pagination);
-            }
-            if (bookDocument.getSections() != null) {
-                ArrayList<String> result = new ArrayList<>();
-                Sections sections = bookDocument.getSections();
-                for (Section section : sections.getSection()) {
-                    for (Serializable content : section.getSectionTitle().getContent()) {
-                        if (content instanceof String) {
-                            result.add((String) content);
-                        }
-                    }
-                }
-                fields.put(new UnknownField("sections"), join(result, "; "));
-            }
-            if (bookDocument.getKeywordList() != null) {
-//                addKeywords(fields, bookDocument.getKeywordList());
-            }
-            if (bookDocument.getContributionDate() != null) {
-                addContributionDate(fields, bookDocument.getContributionDate());
-            }
-            if (bookDocument.getPublicationType() != null) {
-                List<String> result = new ArrayList<>();
-                for (PublicationType type : bookDocument.getPublicationType()) {
-                    if (type.getContent() != null) {
-                        result.add(type.getContent());
-                    }
-                }
-                fields.put(new UnknownField("pubtype"), join(result, ", "));
-            }
-            if (bookDocument.getArticleTitle() != null) {
-                ArticleTitle articleTitle = bookDocument.getArticleTitle();
-                ArrayList<String> titles = new ArrayList<>();
-                for (Serializable content : articleTitle.getContent()) {
-                    if (content instanceof String) {
-                        titles.add((String) content);
-                    }
-                }
-                fields.put(new UnknownField("article"), join(titles, ", "));
-            }
-            if (bookDocument.getBook() != null) {
-                addBookInformation(fields, bookDocument.getBook());
-            }
-        }
-
-        if (currentArticle.getPubmedBookData() != null) {
-            PubmedBookData bookData = currentArticle.getPubmedBookData();
-            putIfValueNotNull(fields, StandardField.PUBSTATE, bookData.getPublicationStatus());
-        }
-
-        BibEntry entry = new BibEntry(StandardEntryType.Article);
-        entry.setField(fields);
-
-        bibItems.add(entry);
-    }
-
-    private void addBookInformation(Map<Field, String> fields, Book book) {
-        if (book.getPublisher() != null) {
-            Publisher publisher = book.getPublisher();
-            putIfValueNotNull(fields, new UnknownField("publocation"), publisher.getPublisherLocation());
-            putStringFromSerializableList(fields, StandardField.PUBLISHER, publisher.getPublisherName().getContent());
-        }
-        if (book.getBookTitle() != null) {
-            BookTitle title = book.getBookTitle();
-            putStringFromSerializableList(fields, StandardField.TITLE, title.getContent());
-        }
-        if (book.getPubDate() != null) {
-            // addPubDate(fields, book.getPubDate());
-        }
-        if (book.getAuthorList() != null) {
-            List<AuthorList> authorLists = book.getAuthorList();
-            // authorLists size should be one
-            if (authorLists.size() == 1) {
-                for (AuthorList authorList : authorLists) {
-                    // handleAuthorList(fields, authorList);
-                }
-            } else {
-                LOGGER.info(String.format("Size of authorlist was %s", authorLists.size()));
-            }
-        }
-
-        putIfValueNotNull(fields, StandardField.VOLUME, book.getVolume());
-        putIfValueNotNull(fields, StandardField.EDITION, book.getEdition());
-        putIfValueNotNull(fields, new UnknownField("medium"), book.getMedium());
-        putIfValueNotNull(fields, new UnknownField("reportnumber"), book.getReportNumber());
-
-        if (book.getELocationID() != null) {
-            for (ELocationID id : book.getELocationID()) {
-//                addElocationID(fields, id);
-            }
-        }
-        if (book.getIsbn() != null) {
-            fields.put(StandardField.ISBN, join(book.getIsbn(), ", "));
-        }
-    }
-
-    private void putStringFromSerializableList(Map<Field, String> fields, Field field, List<Serializable> contentList) {
-        StringBuilder result = new StringBuilder();
-        for (Serializable content : contentList) {
-            if (content instanceof String) {
-                result.append((String) content);
-            }
-        }
-        if (result.length() > 0) {
-            fields.put(field, result.toString());
-        }
-    }
-
-    private void addContributionDate(Map<Field, String> fields, ContributionDate contributionDate) {
-        if ((contributionDate.getDay() != null) && (contributionDate.getMonth() != null)
-                && (contributionDate.getYear() != null)) {
-            String result = convertToDateFormat(contributionDate.getYear(), contributionDate.getMonth(),
-                    contributionDate.getDay());
-            fields.put(new UnknownField("contribution"), result);
-        }
-    }
-
     private String convertToDateFormat(String year, String month, String day) {
         return String.format("%s-%s-%s", year, month, day);
     }
@@ -822,7 +887,7 @@ private void addArticleIdList(Map<Field, String> fields, List<ArticleIDRec> arti
         for (ArticleIDRec id : articleIdList) {
             if (!id.idType().isBlank()) {
                 if ("pubmed".equals(id.idType())) {
-                    fields.put(StandardField.PMID, id.content());
+                    fields.computeIfAbsent(StandardField.PMID, k -> id.content());
                 } else {
                     fields.put(FieldFactory.parseField(StandardEntryType.Article, id.idType()), id.content());
                 }
@@ -839,7 +904,9 @@ private void addNotes(Map<Field, String> fields, List<String> generalNoteList) {
             }
         }
 
-        fields.put(StandardField.NOTE, join(notes, ", "));
+        if (!notes.isEmpty()) {
+            fields.put(StandardField.NOTE, join(notes, ", "));
+        }
     }
 
     private void addInvestigators(Map<Field, String> fields, List<InvestigatorRec> investigatorList) {
@@ -928,22 +995,7 @@ private void addMeshHeading(Map<Field, String> fields, List<MeshHeadingRec> mesh
         }
     }
 
-    private void addGeneSymbols(Map<Field, String> fields, GeneSymbolList geneSymbolList) {
-        List<String> geneSymbols = geneSymbolList.getGeneSymbol();
-        fields.put(new UnknownField("gene-symbols"), join(geneSymbols, ", "));
-    }
-
-    private void addChemicals(Map<Field, String> fields, List<Chemical> chemicals) {
-        List<String> chemicalNames = new ArrayList<>();
-        for (Chemical chemical : chemicals) {
-            if (chemical != null) {
-                chemicalNames.add(chemical.getNameOfSubstance().getContent());
-            }
-        }
-        fields.put(new UnknownField("chemicals"), join(chemicalNames, ", "));
-    }
-
-    private void addPubDate(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+    private void addPubDate(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
         while (reader.hasNext()) {
             reader.next();
             if (isStartXMLEvent(reader)) {
@@ -977,13 +1029,14 @@ private void addPubDate(XMLStreamReader reader, Map<Field, String> fields) throw
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("PubDate")) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
     }
 
-    private void addAbstract(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+    private void addAbstract(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         List<String> abstractText = new ArrayList<>();
 
         while (reader.hasNext()) {
@@ -1006,7 +1059,7 @@ private void addAbstract(XMLStreamReader reader, Map<Field, String> fields) thro
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Abstract")) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
@@ -1014,7 +1067,8 @@ private void addAbstract(XMLStreamReader reader, Map<Field, String> fields) thro
         fields.put(StandardField.ABSTRACT, join(abstractText, " "));
     }
 
-    private void addPagination(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+    private void addPagination(XMLStreamReader reader, Map<Field, String> fields, String startElement)
+            throws XMLStreamException {
         String startPage = "";
         String endPage = "";
 
@@ -1048,7 +1102,7 @@ private void addPagination(XMLStreamReader reader, Map<Field, String> fields) th
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("Pagination")) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
@@ -1059,7 +1113,7 @@ private String extractYear(String medlineDate) {
         return medlineDate.substring(0, 4);
     }
 
-    private void handleAuthorList(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+    private void handleAuthorList(XMLStreamReader reader, Map<Field, String> fields, String startElement) throws XMLStreamException {
         List<String> authorNames = new ArrayList<>();
 
         while (reader.hasNext()) {
@@ -1073,7 +1127,7 @@ private void handleAuthorList(XMLStreamReader reader, Map<Field, String> fields)
                 }
             }
 
-            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals("AuthorList")) {
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
                 break;
             }
         }
@@ -1124,13 +1178,6 @@ private void parseAuthor(XMLStreamReader reader, List<String> authorNames) throw
         }
     }
 
-    private void addDateRevised(Map<Field, String> fields, DateRevised dateRevised) {
-        if ((dateRevised.getDay() != null) && (dateRevised.getMonth() != null) && (dateRevised.getYear() != null)) {
-            fields.put(new UnknownField("revised"),
-                    convertToDateFormat(dateRevised.getYear(), dateRevised.getMonth(), dateRevised.getDay()));
-        }
-    }
-
     private void putIfValueNotNull(Map<Field, String> fields, Field field, String value) {
         if (value != null) {
             fields.put(field, value);
@@ -1138,7 +1185,8 @@ private void putIfValueNotNull(Map<Field, String> fields, Field field, String va
     }
 
     /**
-     * Convert medline page ranges from short form to full form. Medline reports page ranges in a shorthand format. The last page is reported using only the digits which differ from the first page. i.e. 12345-51 refers to the actual range 12345-12351
+     * Convert medline page ranges from short form to full form. Medline reports page ranges in a shorthand format.
+     * The last page is reported using only the digits which differ from the first page. i.e. 12345-51 refers to the actual range 12345-12351
      */
     private String fixPageRange(String pageRange) {
         int minusPos = pageRange.indexOf('-');
diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib
index 6d897517d74..29dddebede9 100644
--- a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib
+++ b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestNbib.bib
@@ -27,7 +27,7 @@ @article{
   pubmodel = {Print-Electronic},
   pubstate = {ppublish},
   references = {23},
-  revised = {2015-9-15},
+  revised = {2015-09-15},
   season = {Spring},
   space-flight-mission = {fly},
   status = {MEDLINE},

From 9e7933528309075ce49dc675bf9e003c8f840adf Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Wed, 15 Mar 2023 23:16:58 -0700
Subject: [PATCH 6/8] clean up code, fix unicode issue

---
 build.gradle                                  |  10 --
 .../importer/fileformat/MedlineImporter.java  | 102 +++++++++---------
 .../{ArticleIDRec.java => ArticleID.java}     |   2 +-
 ...InvestigatorRec.java => Investigator.java} |   2 +-
 .../{MeshHeadingRec.java => MeshHeading.java} |   2 +-
 .../medline/{OtherIDRec.java => OtherID.java} |   2 +-
 ...bjectRec.java => PersonalNameSubject.java} |   2 +-
 7 files changed, 55 insertions(+), 67 deletions(-)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{ArticleIDRec.java => ArticleID.java} (79%)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{InvestigatorRec.java => Investigator.java} (84%)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{MeshHeadingRec.java => MeshHeading.java} (83%)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{OtherIDRec.java => OtherID.java} (80%)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{PersonalNameSubjectRec.java => PersonalNameSubject.java} (74%)

diff --git a/build.gradle b/build.gradle
index 474c551e287..30c74c3e46d 100644
--- a/build.gradle
+++ b/build.gradle
@@ -259,7 +259,6 @@ processResources {
 
 task generateSource(dependsOn: ["generateBstGrammarSource",
                                 "generateSearchGrammarSource",
-                                "generateMedlineSource",
                                 "generateBibtexmlSource",
                                 "generateEndnoteSource",
                                 "generateModsSource",
@@ -290,15 +289,6 @@ tasks.register("generateSearchGrammarSource", JavaExec) {
     args  = ["-o","src-gen/main/java/org/jabref/search" , "-visitor", "-no-listener", "-package", "org.jabref.search", "$projectDir/src/main/antlr4/org/jabref/search/Search.g4"]
 }
 
-task generateMedlineSource(type: XjcTask) {
-    group = 'JabRef'
-    description = "Generates java files for the medline importer."
-
-    schemaFile = "src/main/resources/xjc/medline/medline.xsd"
-    outputDirectory = "src-gen/main/java"
-    javaPackage = "org.jabref.logic.importer.fileformat.medline"
-}
-
 task generateBibtexmlSource(type: XjcTask) {
     group = 'JabRef'
     description = "Generates java files for the bibtexml importer."
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index 86ec8e13f3c..d17acde8f19 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -24,11 +24,11 @@
 import org.jabref.logic.importer.ParseException;
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.ParserResult;
-import org.jabref.logic.importer.fileformat.medline.ArticleIDRec;
-import org.jabref.logic.importer.fileformat.medline.InvestigatorRec;
-import org.jabref.logic.importer.fileformat.medline.MeshHeadingRec;
-import org.jabref.logic.importer.fileformat.medline.OtherIDRec;
-import org.jabref.logic.importer.fileformat.medline.PersonalNameSubjectRec;
+import org.jabref.logic.importer.fileformat.medline.ArticleID;
+import org.jabref.logic.importer.fileformat.medline.Investigator;
+import org.jabref.logic.importer.fileformat.medline.MeshHeading;
+import org.jabref.logic.importer.fileformat.medline.OtherID;
+import org.jabref.logic.importer.fileformat.medline.PersonalNameSubject;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.Date;
@@ -106,6 +106,8 @@ public ParserResult importDatabase(BufferedReader input) throws IOException {
 
             // prevent xxe (https://rules.sonarsource.com/java/RSPEC-2755)
             xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+            // required for reading Unicode characters such as &#xf6;
+            xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, true);
 
             XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(input);
 
@@ -297,12 +299,7 @@ private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fie
                         String eidType = reader.getAttributeValue(null, "EIdType");
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            if (eidType.equals("doi")) {
-                                fields.put(StandardField.DOI, reader.getText());
-                            }
-                            if (eidType.equals("pii")) {
-                                fields.put(new UnknownField("pii"), reader.getText());
-                            }
+                            handleElocationID(fields, reader, eidType);
                         }
                     }
                     case "Isbn" -> {
@@ -324,6 +321,15 @@ private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fie
         }
     }
 
+    private void handleElocationID(Map<Field, String> fields, XMLStreamReader reader, String eidType) {
+        if (eidType.equals("doi")) {
+            fields.put(StandardField.DOI, reader.getText());
+        }
+        if (eidType.equals("pii")) {
+            fields.put(new UnknownField("pii"), reader.getText());
+        }
+    }
+
     private void parseSections(XMLStreamReader reader, List<String> sectionTitleList) throws XMLStreamException {
         int sectionLevel = 0;
 
@@ -387,7 +393,7 @@ private void parseArticle(XMLStreamReader reader, List<BibEntry> bibItems, Strin
     private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields, String startElement)
             throws XMLStreamException {
         String publicationStatus = "";
-        List<ArticleIDRec> articleIDList = new ArrayList<>();
+        List<ArticleID> articleIDList = new ArrayList<>();
 
         while (reader.hasNext()) {
             reader.next();
@@ -404,7 +410,7 @@ private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields,
                         String idType = reader.getAttributeValue(null, "IdType");
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            articleIDList.add(new ArticleIDRec(idType, reader.getText()));
+                            articleIDList.add(new ArticleID(idType, reader.getText()));
                         }
                     }
                 }
@@ -427,12 +433,12 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
             throws XMLStreamException {
         // multiple occurrences of the following fields can be present
         List<String> citationSubsets = new ArrayList<>();
-        List<MeshHeadingRec> meshHeadingList = new ArrayList<>();
-        List<PersonalNameSubjectRec> personalNameSubjectList = new ArrayList<>();
-        List<OtherIDRec> otherIDList = new ArrayList<>();
+        List<MeshHeading> meshHeadingList = new ArrayList<>();
+        List<PersonalNameSubject> personalNameSubjectList = new ArrayList<>();
+        List<OtherID> otherIDList = new ArrayList<>();
         List<String> keywordList = new ArrayList<>();
         List<String> spaceFlightMissionList = new ArrayList<>();
-        List<InvestigatorRec> investigatorList = new ArrayList<>();
+        List<Investigator> investigatorList = new ArrayList<>();
         List<String> generalNoteList = new ArrayList<>();
 
         String status = reader.getAttributeValue(null, "Status");
@@ -481,7 +487,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                     case "MeshHeading" -> {
                         parseMeshHeading(reader, meshHeadingList, elementName);
                     }
-                    case "NumberofReferences" -> {
+                    case "NumberOfReferences" -> {
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
                             putIfValueNotNull(fields, new UnknownField("references"), reader.getText());
@@ -495,7 +501,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
                             String content = reader.getText();
-                            otherIDList.add(new OtherIDRec(otherIdSource, content));
+                            otherIDList.add(new OtherID(otherIdSource, content));
                         }
                     }
                     case "Keyword" -> {
@@ -542,7 +548,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         addNotes(fields, generalNoteList);
     }
 
-    private void parseInvestigator(XMLStreamReader reader, List<InvestigatorRec> investigatorList, String startElement)
+    private void parseInvestigator(XMLStreamReader reader, List<Investigator> investigatorList, String startElement)
             throws XMLStreamException {
         String lastName = "";
         String foreName = "";
@@ -579,10 +585,10 @@ private void parseInvestigator(XMLStreamReader reader, List<InvestigatorRec> inv
             }
         }
 
-        investigatorList.add(new InvestigatorRec(lastName, foreName, affiliationList));
+        investigatorList.add(new Investigator(lastName, foreName, affiliationList));
     }
 
-    private void parsePersonalNameSubject(XMLStreamReader reader, List<PersonalNameSubjectRec> personalNameSubjectList, String startElement)
+    private void parsePersonalNameSubject(XMLStreamReader reader, List<PersonalNameSubject> personalNameSubjectList, String startElement)
             throws XMLStreamException {
         String lastName = "";
         String foreName = "";
@@ -612,10 +618,10 @@ private void parsePersonalNameSubject(XMLStreamReader reader, List<PersonalNameS
             }
         }
 
-        personalNameSubjectList.add(new PersonalNameSubjectRec(lastName, foreName));
+        personalNameSubjectList.add(new PersonalNameSubject(lastName, foreName));
     }
 
-    private void parseMeshHeading(XMLStreamReader reader, List<MeshHeadingRec> meshHeadingList, String startElement)
+    private void parseMeshHeading(XMLStreamReader reader, List<MeshHeading> meshHeadingList, String startElement)
             throws XMLStreamException {
         String descriptorName = "";
         List<String> qualifierNames = new ArrayList<>();
@@ -645,7 +651,7 @@ private void parseMeshHeading(XMLStreamReader reader, List<MeshHeadingRec> meshH
             }
         }
 
-        meshHeadingList.add(new MeshHeadingRec(descriptorName, qualifierNames));
+        meshHeadingList.add(new MeshHeading(descriptorName, qualifierNames));
     }
 
     private void parseGeneSymbolList(XMLStreamReader reader, Map<Field, String> fields, String startElement)
@@ -761,14 +767,10 @@ private void parseArticleInformation(XMLStreamReader reader, Map<Field, String>
                     }
                     case "ELocationID" -> {
                         String eidType = reader.getAttributeValue(null, "EIdType");
+                        String validYN = reader.getAttributeValue(null, "ValidYN");
                         reader.next();
-                        if (isCharacterXMLEvent(reader)) {
-                            if (eidType.equals("doi")) {
-                                fields.put(StandardField.DOI, reader.getText());
-                            }
-                            if (eidType.equals("pii")) {
-                                fields.put(new UnknownField("pii"), reader.getText());
-                            }
+                        if (isCharacterXMLEvent(reader) && "Y".equals(validYN)) {
+                            handleElocationID(fields, reader, eidType);
                         }
                     }
                     case "Abstract" -> {
@@ -879,17 +881,13 @@ private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String
                 fields.put(new UnknownField(dateFieldMap.get(startElement)), dateValue.getNormalized()));
     }
 
-    private String convertToDateFormat(String year, String month, String day) {
-        return String.format("%s-%s-%s", year, month, day);
-    }
-
-    private void addArticleIdList(Map<Field, String> fields, List<ArticleIDRec> articleIdList) {
-        for (ArticleIDRec id : articleIdList) {
+    private void addArticleIdList(Map<Field, String> fields, List<ArticleID> articleIdList) {
+        for (ArticleID id : articleIdList) {
             if (!id.idType().isBlank()) {
                 if ("pubmed".equals(id.idType())) {
                     fields.computeIfAbsent(StandardField.PMID, k -> id.content());
                 } else {
-                    fields.put(FieldFactory.parseField(StandardEntryType.Article, id.idType()), id.content());
+                    fields.computeIfAbsent(FieldFactory.parseField(StandardEntryType.Article, id.idType()), k -> id.content());
                 }
             }
         }
@@ -909,13 +907,13 @@ private void addNotes(Map<Field, String> fields, List<String> generalNoteList) {
         }
     }
 
-    private void addInvestigators(Map<Field, String> fields, List<InvestigatorRec> investigatorList) {
+    private void addInvestigators(Map<Field, String> fields, List<Investigator> investigatorList) {
         List<String> investigatorNames = new ArrayList<>();
         List<String> affiliationInfos = new ArrayList<>();
 
         // add the investigators like the authors
         if (!investigatorList.isEmpty()) {
-            for (InvestigatorRec investigator : investigatorList) {
+            for (Investigator investigator : investigatorList) {
                 StringBuilder result = new StringBuilder(investigator.lastName());
                 if (!investigator.foreName().isBlank()) {
                     result.append(", ").append(investigator.foreName());
@@ -950,21 +948,21 @@ private void addKeywords(Map<Field, String> fields, List<String> keywordList) {
         }
     }
 
-    private void addOtherId(Map<Field, String> fields, List<OtherIDRec> otherIDList) {
-        for (OtherIDRec id : otherIDList) {
+    private void addOtherId(Map<Field, String> fields, List<OtherID> otherIDList) {
+        for (OtherID id : otherIDList) {
             if (!id.source().isBlank() && !id.content().isBlank()) {
                 fields.put(FieldFactory.parseField(StandardEntryType.Article, id.source()), id.content());
             }
         }
     }
 
-    private void addPersonalNames(Map<Field, String> fields, List<PersonalNameSubjectRec> personalNameSubjectList) {
+    private void addPersonalNames(Map<Field, String> fields, List<PersonalNameSubject> personalNameSubjectList) {
         if (fields.get(StandardField.AUTHOR) == null) {
             // if no authors appear, then add the personal names as authors
             List<String> personalNames = new ArrayList<>();
 
             if (!personalNameSubjectList.isEmpty()) {
-                for (PersonalNameSubjectRec personalNameSubject : personalNameSubjectList) {
+                for (PersonalNameSubject personalNameSubject : personalNameSubjectList) {
                     StringBuilder result = new StringBuilder(personalNameSubject.lastName());
                     if (!personalNameSubject.foreName().isBlank()) {
                         result.append(", ").append(personalNameSubject.foreName());
@@ -977,11 +975,11 @@ private void addPersonalNames(Map<Field, String> fields, List<PersonalNameSubjec
         }
     }
 
-    private void addMeshHeading(Map<Field, String> fields, List<MeshHeadingRec> meshHeadingList) {
+    private void addMeshHeading(Map<Field, String> fields, List<MeshHeading> meshHeadingList) {
         List<String> keywords = new ArrayList<>();
 
         if (!meshHeadingList.isEmpty()) {
-            for (MeshHeadingRec meshHeading : meshHeadingList) {
+            for (MeshHeading meshHeading : meshHeadingList) {
                 StringBuilder result = new StringBuilder(meshHeading.descriptorName());
                 if (meshHeading.qualifierNames() != null) {
                     for (String qualifierName : meshHeading.qualifierNames()) {
@@ -1136,7 +1134,7 @@ private void handleAuthorList(XMLStreamReader reader, Map<Field, String> fields,
     }
 
     private void parseAuthor(XMLStreamReader reader, List<String> authorNames) throws XMLStreamException {
-        String authorName = "";
+        StringBuilder authorName = new StringBuilder();
         List<String> collectiveNames = new ArrayList<>();
 
         while (reader.hasNext()) {
@@ -1153,13 +1151,13 @@ private void parseAuthor(XMLStreamReader reader, List<String> authorNames) throw
                     case "LastName" -> {
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            authorName = reader.getText();
+                            authorName = new StringBuilder(reader.getText());
                         }
                     }
                     case "ForeName" -> {
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            authorName += ", " + reader.getText();
+                            authorName.append(", ").append(reader.getText());
                         }
                     }
                 }
@@ -1173,8 +1171,8 @@ private void parseAuthor(XMLStreamReader reader, List<String> authorNames) throw
         if (collectiveNames.size() > 0) {
             authorNames.addAll(collectiveNames);
         }
-        if (!authorName.isBlank()) {
-            authorNames.add(authorName);
+        if (!authorName.toString().isBlank()) {
+            authorNames.add(authorName.toString());
         }
     }
 
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleID.java
similarity index 79%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleID.java
index b39cfc7b8f8..338d698b8ee 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleIDRec.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleID.java
@@ -1,6 +1,6 @@
 package org.jabref.logic.importer.fileformat.medline;
 
-public record ArticleIDRec(
+public record ArticleID(
         String idType,
         String content
 ) {
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/Investigator.java
similarity index 84%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/Investigator.java
index a2efb856d7b..64ea31e6206 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/InvestigatorRec.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/Investigator.java
@@ -2,7 +2,7 @@
 
 import java.util.List;
 
-public record InvestigatorRec(
+public record Investigator(
         String lastName,
         String foreName,
         List<String> affiliationList
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeading.java
similarity index 83%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeading.java
index 413fcf64960..a78f65f9727 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeadingRec.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/MeshHeading.java
@@ -2,7 +2,7 @@
 
 import java.util.List;
 
-public record MeshHeadingRec(
+public record MeshHeading(
         String descriptorName,
         List<String> qualifierNames
 ) {
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherID.java
similarity index 80%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/OtherID.java
index d653df925d3..a3b07e4912b 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherIDRec.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherID.java
@@ -1,6 +1,6 @@
 package org.jabref.logic.importer.fileformat.medline;
 
-public record OtherIDRec(
+public record OtherID(
         String source,
         String content
 ) {
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubject.java
similarity index 74%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubject.java
index 202c800ef1e..bda9c6aefff 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubjectRec.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/PersonalNameSubject.java
@@ -1,6 +1,6 @@
 package org.jabref.logic.importer.fileformat.medline;
 
-public record PersonalNameSubjectRec(
+public record PersonalNameSubject(
         String lastName,
         String foreName
 ) {

From 5612b7b83eadc6ee8c3ab6b8a92ec7db4ea996ed Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Thu, 16 Mar 2023 20:12:26 -0700
Subject: [PATCH 7/8] update class/variable names, add changelog

---
 CHANGELOG.md                                  |   1 +
 .../importer/fileformat/MedlineImporter.java  |  32 +-
 .../{ArticleID.java => ArticleId.java}        |   2 +-
 .../medline/{OtherID.java => OtherId.java}    |   2 +-
 src/main/resources/xjc/medline/medline.xsd    | 314 ------------------
 5 files changed, 19 insertions(+), 332 deletions(-)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{ArticleID.java => ArticleId.java} (81%)
 rename src/main/java/org/jabref/logic/importer/fileformat/medline/{OtherID.java => OtherId.java} (82%)
 delete mode 100644 src/main/resources/xjc/medline/medline.xsd

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2073b82de19..1d36e856eb2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 - 'Get full text' now also checks the file url. [#568](https://github.com/koppor/jabref/issues/568)
 - We refined the 'main directory not found' error message. [#9625](https://github.com/JabRef/jabref/pull/9625)
 - We modified the `Add Group` dialog to use the most recently selected group hierarchical context [#9141](https://github.com/JabRef/jabref/issues/9141)
+- We improved the Medline importer to correctly import ISO dates for `revised`. [#9536](https://github.com/JabRef/jabref/issues/9536)
 
 
 
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index d17acde8f19..491151365f4 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -24,10 +24,10 @@
 import org.jabref.logic.importer.ParseException;
 import org.jabref.logic.importer.Parser;
 import org.jabref.logic.importer.ParserResult;
-import org.jabref.logic.importer.fileformat.medline.ArticleID;
+import org.jabref.logic.importer.fileformat.medline.ArticleId;
 import org.jabref.logic.importer.fileformat.medline.Investigator;
 import org.jabref.logic.importer.fileformat.medline.MeshHeading;
-import org.jabref.logic.importer.fileformat.medline.OtherID;
+import org.jabref.logic.importer.fileformat.medline.OtherId;
 import org.jabref.logic.importer.fileformat.medline.PersonalNameSubject;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.model.entry.BibEntry;
@@ -299,7 +299,7 @@ private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fie
                         String eidType = reader.getAttributeValue(null, "EIdType");
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            handleElocationID(fields, reader, eidType);
+                            handleElocationId(fields, reader, eidType);
                         }
                     }
                     case "Isbn" -> {
@@ -321,7 +321,7 @@ private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fie
         }
     }
 
-    private void handleElocationID(Map<Field, String> fields, XMLStreamReader reader, String eidType) {
+    private void handleElocationId(Map<Field, String> fields, XMLStreamReader reader, String eidType) {
         if (eidType.equals("doi")) {
             fields.put(StandardField.DOI, reader.getText());
         }
@@ -393,7 +393,7 @@ private void parseArticle(XMLStreamReader reader, List<BibEntry> bibItems, Strin
     private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields, String startElement)
             throws XMLStreamException {
         String publicationStatus = "";
-        List<ArticleID> articleIDList = new ArrayList<>();
+        List<ArticleId> articleIdList = new ArrayList<>();
 
         while (reader.hasNext()) {
             reader.next();
@@ -410,7 +410,7 @@ private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields,
                         String idType = reader.getAttributeValue(null, "IdType");
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            articleIDList.add(new ArticleID(idType, reader.getText()));
+                            articleIdList.add(new ArticleId(idType, reader.getText()));
                         }
                     }
                 }
@@ -423,8 +423,8 @@ private void parsePubmedData(XMLStreamReader reader, Map<Field, String> fields,
 
         if (fields.get(new UnknownField("revised")) != null) {
             putIfValueNotNull(fields, StandardField.PUBSTATE, publicationStatus);
-            if (!articleIDList.isEmpty()) {
-                addArticleIdList(fields, articleIDList);
+            if (!articleIdList.isEmpty()) {
+                addArticleIdList(fields, articleIdList);
             }
         }
     }
@@ -435,7 +435,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         List<String> citationSubsets = new ArrayList<>();
         List<MeshHeading> meshHeadingList = new ArrayList<>();
         List<PersonalNameSubject> personalNameSubjectList = new ArrayList<>();
-        List<OtherID> otherIDList = new ArrayList<>();
+        List<OtherId> otherIdList = new ArrayList<>();
         List<String> keywordList = new ArrayList<>();
         List<String> spaceFlightMissionList = new ArrayList<>();
         List<Investigator> investigatorList = new ArrayList<>();
@@ -501,7 +501,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
                             String content = reader.getText();
-                            otherIDList.add(new OtherID(otherIdSource, content));
+                            otherIdList.add(new OtherId(otherIdSource, content));
                         }
                     }
                     case "Keyword" -> {
@@ -539,7 +539,7 @@ private void parseMedlineCitation(XMLStreamReader reader, Map<Field, String> fie
         }
         addMeshHeading(fields, meshHeadingList);
         addPersonalNames(fields, personalNameSubjectList);
-        addOtherId(fields, otherIDList);
+        addOtherId(fields, otherIdList);
         addKeywords(fields, keywordList);
         if (!spaceFlightMissionList.isEmpty()) {
             fields.put(new UnknownField("space-flight-mission"), join(spaceFlightMissionList, ", "));
@@ -770,7 +770,7 @@ private void parseArticleInformation(XMLStreamReader reader, Map<Field, String>
                         String validYN = reader.getAttributeValue(null, "ValidYN");
                         reader.next();
                         if (isCharacterXMLEvent(reader) && "Y".equals(validYN)) {
-                            handleElocationID(fields, reader, eidType);
+                            handleElocationId(fields, reader, eidType);
                         }
                     }
                     case "Abstract" -> {
@@ -881,8 +881,8 @@ private void parseDate(XMLStreamReader reader, Map<Field, String> fields, String
                 fields.put(new UnknownField(dateFieldMap.get(startElement)), dateValue.getNormalized()));
     }
 
-    private void addArticleIdList(Map<Field, String> fields, List<ArticleID> articleIdList) {
-        for (ArticleID id : articleIdList) {
+    private void addArticleIdList(Map<Field, String> fields, List<ArticleId> articleIdList) {
+        for (ArticleId id : articleIdList) {
             if (!id.idType().isBlank()) {
                 if ("pubmed".equals(id.idType())) {
                     fields.computeIfAbsent(StandardField.PMID, k -> id.content());
@@ -948,8 +948,8 @@ private void addKeywords(Map<Field, String> fields, List<String> keywordList) {
         }
     }
 
-    private void addOtherId(Map<Field, String> fields, List<OtherID> otherIDList) {
-        for (OtherID id : otherIDList) {
+    private void addOtherId(Map<Field, String> fields, List<OtherId> otherIdList) {
+        for (OtherId id : otherIdList) {
             if (!id.source().isBlank() && !id.content().isBlank()) {
                 fields.put(FieldFactory.parseField(StandardEntryType.Article, id.source()), id.content());
             }
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleID.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleId.java
similarity index 81%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleID.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleId.java
index 338d698b8ee..3a8be1b9b63 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleID.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/ArticleId.java
@@ -1,6 +1,6 @@
 package org.jabref.logic.importer.fileformat.medline;
 
-public record ArticleID(
+public record ArticleId(
         String idType,
         String content
 ) {
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherID.java b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherId.java
similarity index 82%
rename from src/main/java/org/jabref/logic/importer/fileformat/medline/OtherID.java
rename to src/main/java/org/jabref/logic/importer/fileformat/medline/OtherId.java
index a3b07e4912b..4429436c332 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherID.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/medline/OtherId.java
@@ -1,6 +1,6 @@
 package org.jabref.logic.importer.fileformat.medline;
 
-public record OtherID(
+public record OtherId(
         String source,
         String content
 ) {
diff --git a/src/main/resources/xjc/medline/medline.xsd b/src/main/resources/xjc/medline/medline.xsd
deleted file mode 100644
index 9c2f73a5bdd..00000000000
--- a/src/main/resources/xjc/medline/medline.xsd
+++ /dev/null
@@ -1,314 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Source: https://github.com/lstern/psilibrary/blob/master/doc/medline/medline.xsd-->
-<!--
-     This is the Current DTD which NLM has written for 
-  External  Use.  If you are a NCBI User, use the information
-  from the PubmedArticleSet.
-  
-  Comments and suggestions are welcome.
-  (May 9, 2000)
-  
-  Corrections:
-  ~~~~~~~~~~~
-  Oct. 09 2002 
-  - "PubMedArticle" has been renamed to "PubmedArticle"
-  - All referencies to "PubMedArticle" has been removed
-  - "ProviderId" has been removed from PubmedData
-  - "URL" has been removed from PubmdeData
-  
-  	$Id: pubmed_160101.dtd 480858 2015-10-05 15:12:19Z korobtch $
-  
--->
-<!-- ================================================================= -->
-<!-- ================================================================= -->
-<!-- Reference to Where the MEDLINECITATION DTD is located -->
-<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">
-  <xs:include schemaLocation="nlmmedlinecitationset_160101.xsd"/>
-  <xs:include schemaLocation="bookdoc_160101.xsd"/>
-  <!-- ================================================================= -->
-  <xs:simpleType name="iso.language.codes">
-    <xs:restriction base="xs:token">
-      <xs:enumeration value="AF"/>
-      <xs:enumeration value="AR"/>
-      <xs:enumeration value="AZ"/>
-      <xs:enumeration value="BG"/>
-      <xs:enumeration value="CS"/>
-      <xs:enumeration value="DA"/>
-      <xs:enumeration value="DE"/>
-      <xs:enumeration value="EN"/>
-      <xs:enumeration value="EL"/>
-      <xs:enumeration value="ES"/>
-      <xs:enumeration value="FA"/>
-      <xs:enumeration value="FI"/>
-      <xs:enumeration value="FR"/>
-      <xs:enumeration value="HE"/>
-      <xs:enumeration value="HU"/>
-      <xs:enumeration value="HY"/>
-      <xs:enumeration value="IN"/>
-      <xs:enumeration value="IS"/>
-      <xs:enumeration value="IT"/>
-      <xs:enumeration value="IW"/>
-      <xs:enumeration value="JA"/>
-      <xs:enumeration value="KA"/>
-      <xs:enumeration value="KO"/>
-      <xs:enumeration value="LT"/>
-      <xs:enumeration value="MK"/>
-      <xs:enumeration value="ML"/>
-      <xs:enumeration value="NL"/>
-      <xs:enumeration value="NO"/>
-      <xs:enumeration value="PL"/>
-      <xs:enumeration value="PT"/>
-      <xs:enumeration value="PS"/>
-      <xs:enumeration value="RO"/>
-      <xs:enumeration value="RU"/>
-      <xs:enumeration value="SL"/>
-      <xs:enumeration value="SK"/>
-      <xs:enumeration value="SQ"/>
-      <xs:enumeration value="SR"/>
-      <xs:enumeration value="SV"/>
-      <xs:enumeration value="SW"/>
-      <xs:enumeration value="TH"/>
-      <xs:enumeration value="TR"/>
-      <xs:enumeration value="UK"/>
-      <xs:enumeration value="VI"/>
-      <xs:enumeration value="ZH"/>
-    </xs:restriction>
-  </xs:simpleType>
-  <xs:simpleType name="pub.status.int">
-    <xs:restriction base="xs:token">
-      <xs:enumeration value="pmc"/>
-      <xs:enumeration value="pmcr"/>
-      <xs:enumeration value="pubmed"/>
-      <xs:enumeration value="pubmedr"/>
-      <xs:enumeration value="premedline"/>
-      <xs:enumeration value="medline"/>
-      <xs:enumeration value="medliner"/>
-      <xs:enumeration value="entrez"/>
-      <xs:enumeration value="pmc-release"/>
-    </xs:restriction>
-  </xs:simpleType>
-  <xs:simpleType name="pub.status">
-    <xs:union memberTypes="pub.status.int">
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="received"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="accepted"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="epublish"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="ppublish"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="revised"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="aheadofprint"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="retracted"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="ecollection"/>
-        </xs:restriction>
-      </xs:simpleType>
-    </xs:union>
-  </xs:simpleType>
-  <xs:simpleType name="art.id.type.int">
-    <xs:restriction base="xs:token">
-      <xs:enumeration value="pubmed"/>
-      <xs:enumeration value="medline"/>
-      <xs:enumeration value="pmcid"/>
-      <xs:enumeration value="pmcbook"/>
-      <xs:enumeration value="bookaccession"/>
-    </xs:restriction>
-  </xs:simpleType>
-  <xs:simpleType name="art.id.type">
-    <xs:union memberTypes="art.id.type.int">
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="doi"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="pii"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="pmcpid"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="pmpid"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="pmc"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="mid"/>
-        </xs:restriction>
-      </xs:simpleType>
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="sici"/>
-        </xs:restriction>
-      </xs:simpleType>
-    </xs:union>
-  </xs:simpleType>
-  <!-- ================================================================= -->
-  <xs:element name="PubmedArticleSet">
-    <xs:complexType>
-      <xs:choice maxOccurs="unbounded">
-        <xs:element ref="PubmedArticle"/>
-        <xs:element ref="PubmedBookArticle"/>
-      </xs:choice>
-    </xs:complexType>
-  </xs:element>
-  <!-- ================================================================= -->
-  <!-- This is the top level element for PubMedArticle -->
-  <xs:element name="PubmedArticle">
-    <xs:complexType>
-      <xs:sequence>
-        <xs:element ref="MedlineCitation"/>
-        <xs:element minOccurs="0" ref="PubmedData"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
-  <!-- ================================================================= -->
-  <xs:complexType name="normal.date">
-    <xs:sequence>
-      <xs:element ref="Year"/>
-      <xs:element ref="Month"/>
-      <xs:element ref="Day"/>
-      <xs:sequence minOccurs="0">
-        <xs:element ref="Hour"/>
-        <xs:sequence minOccurs="0">
-          <xs:element ref="Minute"/>
-          <xs:element minOccurs="0" ref="Second"/>
-        </xs:sequence>
-      </xs:sequence>
-    </xs:sequence>
-  </xs:complexType>
-  <xs:element name="PubmedData">
-    <xs:complexType>
-      <xs:sequence>
-        <xs:element minOccurs="0" ref="History"/>
-        <xs:element ref="PublicationStatus"/>
-        <xs:element ref="ArticleIdList"/>
-        <xs:element minOccurs="0" ref="ObjectList"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
-  <xs:element name="PubMedPubDate">
-    <xs:complexType>
-      <xs:complexContent>
-        <xs:extension base="normal.date">
-          <xs:attributeGroup ref="attlist.PubMedPubDate"/>
-        </xs:extension>
-      </xs:complexContent>
-    </xs:complexType>
-  </xs:element>
-  <xs:attributeGroup name="attlist.PubMedPubDate">
-    <xs:attribute name="PubStatus" use="required" type="pub.status"/>
-  </xs:attributeGroup>
-  <xs:element name="PublicationStatus" type="xs:string"/>
-  <xs:element name="ArticleIdList">
-    <xs:complexType>
-      <xs:sequence>
-        <xs:element maxOccurs="unbounded" ref="ArticleId"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
-  <xs:element name="ArticleId">
-    <xs:complexType mixed="true">
-      <xs:attributeGroup ref="attlist.ArticleId"/>
-    </xs:complexType>
-  </xs:element>
-  <xs:attributeGroup name="attlist.ArticleId">
-    <xs:attribute name="IdType" default="pubmed" type="art.id.type"/>
-  </xs:attributeGroup>
-  <xs:element name="History">
-    <xs:complexType>
-      <xs:sequence>
-        <xs:element maxOccurs="unbounded" ref="PubMedPubDate"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
-  <xs:element name="URL">
-    <xs:complexType mixed="true">
-      <xs:attributeGroup ref="attlist.URL"/>
-    </xs:complexType>
-  </xs:element>
-  <xs:attributeGroup name="attlist.URL">
-    <xs:attribute name="lang" type="iso.language.codes"/>
-    <xs:attribute name="Type">
-      <xs:simpleType>
-        <xs:restriction base="xs:token">
-          <xs:enumeration value="FullText"/>
-          <xs:enumeration value="Summary"/>
-          <xs:enumeration value="fulltext"/>
-          <xs:enumeration value="summary"/>
-        </xs:restriction>
-      </xs:simpleType>
-    </xs:attribute>
-  </xs:attributeGroup>
-  <xs:element name="ObjectList">
-    <xs:complexType>
-      <xs:sequence>
-        <xs:element maxOccurs="unbounded" ref="Object"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
-  <xs:element name="Object">
-    <xs:complexType>
-      <xs:sequence>
-        <xs:element minOccurs="0" maxOccurs="unbounded" ref="Param"/>
-      </xs:sequence>
-      <xs:attributeGroup ref="attlist.Object"/>
-    </xs:complexType>
-  </xs:element>
-  <xs:attributeGroup name="attlist.Object">
-    <xs:attribute name="Type" use="required"/>
-  </xs:attributeGroup>
-  <xs:element name="Param">
-    <xs:complexType>
-      <xs:complexContent>
-        <xs:extension base="text">
-          <xs:attributeGroup ref="attlist.Param"/>
-        </xs:extension>
-      </xs:complexContent>
-    </xs:complexType>
-  </xs:element>
-  <xs:attributeGroup name="attlist.Param">
-    <xs:attribute name="Name" use="required"/>
-  </xs:attributeGroup>
-  <xs:element name="Hour" type="xs:string"/>
-  <xs:element name="Minute" type="xs:string"/>
-  <xs:element name="Second" type="xs:string"/>
-  <!-- ================================================================= -->
-</xs:schema>
-<!-- ================================================================= -->

From d4f1d49b1b6299eca7ecc9f0571a5ed15a02a326 Mon Sep 17 00:00:00 2001
From: Nitin Suresh <aqurilla@gmail.com>
Date: Fri, 17 Mar 2023 21:03:42 -0700
Subject: [PATCH 8/8] handle text element containing italics/bold tags

---
 .../importer/fileformat/MedlineImporter.java  |  58 +-
 .../MedlineImporterTestArticleItalics.bib     |  26 +
 .../MedlineImporterTestArticleItalics.xml     | 652 ++++++++++++++++++
 3 files changed, 725 insertions(+), 11 deletions(-)
 create mode 100644 src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib
 create mode 100644 src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
index 491151365f4..4522c34429f 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/MedlineImporter.java
@@ -241,6 +241,7 @@ private void parseBookDocument(XMLStreamReader reader, Map<Field, String> fields
     private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fields, String startElement)
             throws XMLStreamException {
         List<String> isbnList = new ArrayList<>();
+        List<String> titleList = new ArrayList<>();
 
         while (reader.hasNext()) {
             reader.next();
@@ -260,10 +261,7 @@ private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fie
                         }
                     }
                     case "BookTitle" -> {
-                        reader.next();
-                        if (isCharacterXMLEvent(reader)) {
-                            putIfValueNotNull(fields, StandardField.TITLE, reader.getText());
-                        }
+                        handleTextElement(reader, titleList, elementName);
                     }
                     case "PubDate" -> {
                         addPubDate(reader, fields, elementName);
@@ -319,6 +317,10 @@ private void parseBookInformation(XMLStreamReader reader, Map<Field, String> fie
         if (!isbnList.isEmpty()) {
             fields.put(StandardField.ISBN, join(isbnList, ", "));
         }
+
+        if (!titleList.isEmpty()) {
+            putIfValueNotNull(fields, StandardField.TITLE, join(titleList, " "));
+        }
     }
 
     private void handleElocationId(Map<Field, String> fields, XMLStreamReader reader, String eidType) {
@@ -745,6 +747,7 @@ private void parseMedlineJournalInfo(XMLStreamReader reader, Map<Field, String>
     }
 
     private void parseArticleInformation(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
+        List<String> titleList = new ArrayList<>();
         String pubmodel = reader.getAttributeValue(null, "PubModel");
         fields.put(new UnknownField("pubmodel"), pubmodel);
 
@@ -757,10 +760,7 @@ private void parseArticleInformation(XMLStreamReader reader, Map<Field, String>
                         parseJournal(reader, fields);
                     }
                     case "ArticleTitle" -> {
-                        reader.next();
-                        if (isCharacterXMLEvent(reader)) {
-                            fields.put(StandardField.TITLE, StringUtil.stripBrackets(reader.getText()));
-                        }
+                        handleTextElement(reader, titleList, elementName);
                     }
                     case "Pagination" -> {
                         addPagination(reader, fields, elementName);
@@ -786,6 +786,10 @@ private void parseArticleInformation(XMLStreamReader reader, Map<Field, String>
                 break;
             }
         }
+
+        if (!titleList.isEmpty()) {
+            fields.put(StandardField.TITLE, StringUtil.stripBrackets(join(titleList, " ")));
+        }
     }
 
     private void parseJournal(XMLStreamReader reader, Map<Field, String> fields) throws XMLStreamException {
@@ -1035,7 +1039,7 @@ private void addPubDate(XMLStreamReader reader, Map<Field, String> fields, Strin
 
     private void addAbstract(XMLStreamReader reader, Map<Field, String> fields, String startElement)
             throws XMLStreamException {
-        List<String> abstractText = new ArrayList<>();
+        List<String> abstractTextList = new ArrayList<>();
 
         while (reader.hasNext()) {
             reader.next();
@@ -1049,12 +1053,44 @@ private void addAbstract(XMLStreamReader reader, Map<Field, String> fields, Stri
                         }
                     }
                     case "AbstractText" -> {
+                        handleTextElement(reader, abstractTextList, elementName);
+                    }
+                }
+            }
+
+            if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
+                break;
+            }
+        }
+
+        if (!abstractTextList.isEmpty()) {
+            fields.put(StandardField.ABSTRACT, join(abstractTextList, " "));
+        }
+    }
+
+    /**
+     * Handles text entities that can have inner tags such as {@literal <}i{@literal >}, {@literal <}b{@literal >} etc.
+     * We ignore the tags and return only the characters present in the enclosing parent element.
+     *
+     */
+    private void handleTextElement(XMLStreamReader reader, List<String> textList, String startElement)
+            throws XMLStreamException {
+        StringBuilder result = new StringBuilder();
+
+        while (reader.hasNext()) {
+            reader.next();
+            if (isStartXMLEvent(reader)) {
+                String elementName = reader.getName().getLocalPart();
+                switch (elementName) {
+                    case "sup", "sub" -> {
                         reader.next();
                         if (isCharacterXMLEvent(reader)) {
-                            abstractText.add(reader.getText());
+                            result.append("(").append(reader.getText()).append(")");
                         }
                     }
                 }
+            } else if (isCharacterXMLEvent(reader)) {
+                result.append(reader.getText().trim()).append(" ");
             }
 
             if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) {
@@ -1062,7 +1098,7 @@ private void addAbstract(XMLStreamReader reader, Map<Field, String> fields, Stri
             }
         }
 
-        fields.put(StandardField.ABSTRACT, join(abstractText, " "));
+        textList.add(result.toString().trim());
     }
 
     private void addPagination(XMLStreamReader reader, Map<Field, String> fields, String startElement)
diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib
new file mode 100644
index 00000000000..f64ae9d0921
--- /dev/null
+++ b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.bib
@@ -0,0 +1,26 @@
+@Article{,
+  author               = {Moreno-Grau, Sonia and Hernández, Isabel and Heilmann-Heimbach, Stefanie and Ruiz, Susana and Rosende-Roca, Maitée and Mauleón, Ana and Vargas, Liliana and Rodríguez-Gómez, Octavio and Alegret, Montserrat and Espinosa, Ana and Ortega, Gemma and Aguilera, Nuria and Abdelnour, Carla and Neuroimaging Initiative, Alzheimer's Disease and Gil, Silvia and Maier, Wolfgang and Sotolongo-Grau, Oscar and Tárraga, Lluís and Ramirez, Alfredo and López-Arrrieta, Jesús and Antúnez, Carmen and Serrano-Ríos, Manuel and Boada, Mercè and Ruiz, Agustín},
+  journal              = {Oncotarget},
+  title                = {Genome-wide significant risk factors on chromosome 19 and the APOE locus.},
+  year                 = {2018},
+  issn                 = {1949-2553},
+  month                = may,
+  pages                = {24590--24600},
+  volume               = {9},
+  abstract             = {The apolipoprotein E ( APOE ) gene on chromosome 19q13.32, was the first, and remains the strongest, genetic risk factor for Alzheimer's disease (AD). Additional signals associated with AD have been located in chromosome 19, including ABCA7 (19p13.3) and CD33 ( 19q13.41). The ABCA7 gene has been replicated in most populations. However, the contribution to AD of other signals close to APOE gene remains controversial. Possible explanations for inconsistency between reports include long range linkage disequilibrium (LRLD). We analysed the contribution of ABCA7 and CD33 loci to AD risk and explore LRLD patterns across APOE region. To evaluate AD risk conferred by ABCA7 rs4147929:G>A and CD33 rs3865444:C>A, we used a large Spanish population (1796 AD cases, 2642 controls). The ABCA7 rs4147929:G>A SNP effect was nominally replicated in the Spanish cohort and reached genome-wide significance after meta-analysis (odds ratio (OR)=1.15, 95% confidence interval (95% CI)=1.12-1.19; P = 1.60 x 10 (-19)). CD33 rs3865444:C>A was not associated with AD in the dataset. The meta-analysis was also negative (OR=0.98, 95% CI=0.93-1.04; P =0.48). After exploring LRLD patterns between APOE and CD33 in several datasets, we found significant LD (D' >0.20; P <0.030) between APOE -Ɛ2 and CD33 rs3865444C>A in two of five datasets, suggesting the presence of a non-universal long range interaction between these loci affecting to some populations. In conclusion, we provide here evidence of genetic association of the ABCA7 locus in the Spanish population and also propose a plausible explanation for the controversy on the contribution of CD33 to AD susceptibility.},
+  country              = {United States},
+  doi                  = {10.18632/oncotarget.25083},
+  issn-linking         = {1949-2553},
+  issue                = {37},
+  journal-abbreviation = {Oncotarget},
+  keywords             = {ABCA7; APOE; CD33; Gerotarget; late onset Alzheimer’s disease; linkage disequilibrium},
+  nlm-id               = {101532965},
+  owner                = {NLM},
+  pii                  = {25083},
+  pmc                  = {PMC5973862},
+  pmid                 = {29872490},
+  pubmodel             = {Electronic-eCollection},
+  pubstate             = {epublish},
+  revised              = {2019-11-20},
+  status = {PubMed-not-MEDLINE},
+}
diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml
new file mode 100644
index 00000000000..fa4c03e3f4d
--- /dev/null
+++ b/src/test/resources/org/jabref/logic/importer/fileformat/MedlineImporterTestArticleItalics.xml
@@ -0,0 +1,652 @@
+<PubmedArticleSet>
+    <PubmedArticle>
+        <MedlineCitation Status="PubMed-not-MEDLINE" Owner="NLM">
+            <PMID Version="1">29872490</PMID>
+            <DateRevised>
+                <Year>2019</Year>
+                <Month>11</Month>
+                <Day>20</Day>
+            </DateRevised>
+            <Article PubModel="Electronic-eCollection">
+                <Journal>
+                    <ISSN IssnType="Electronic">1949-2553</ISSN>
+                    <JournalIssue CitedMedium="Internet">
+                        <Volume>9</Volume>
+                        <Issue>37</Issue>
+                        <PubDate>
+                            <Year>2018</Year>
+                            <Month>May</Month>
+                            <Day>15</Day>
+                        </PubDate>
+                    </JournalIssue>
+                    <Title>Oncotarget</Title>
+                    <ISOAbbreviation>Oncotarget</ISOAbbreviation>
+                </Journal>
+                <ArticleTitle>Genome-wide significant risk factors on chromosome 19 and the
+                    <i>APOE</i> locus.
+                </ArticleTitle>
+                <Pagination>
+                    <StartPage>24590</StartPage>
+                    <EndPage>24600</EndPage>
+                    <MedlinePgn>24590-24600</MedlinePgn>
+                </Pagination>
+                <ELocationID EIdType="doi" ValidYN="Y">10.18632/oncotarget.25083</ELocationID>
+                <Abstract>
+                    <AbstractText>The apolipoprotein E (
+                        <i>APOE</i>) gene on chromosome 19q13.32, was the first, and remains the strongest, genetic risk factor for Alzheimer's disease (AD). Additional signals associated with AD have been located in chromosome 19, including
+                        <i>ABCA7</i> (19p13.3) and
+                        <i>CD33 (</i>19q13.41). The
+                        <i>ABCA7</i> gene has been replicated in most populations. However, the contribution to AD of other signals close to
+                        <i>APOE</i> gene remains controversial. Possible explanations for inconsistency between reports include long range linkage disequilibrium (LRLD). We analysed the contribution of
+                        <i>ABCA7</i> and
+                        <i>CD33</i> loci to AD risk and explore LRLD patterns across
+                        <i>APOE</i> region. To evaluate AD risk conferred by
+                        <i>ABCA7</i> rs4147929:G&gt;A and
+                        <i>CD33</i> rs3865444:C&gt;A, we used a large Spanish population (1796 AD cases, 2642 controls). The
+                        <i>ABCA7</i> rs4147929:G&gt;A SNP effect was nominally replicated in the Spanish cohort and reached genome-wide significance after meta-analysis (odds ratio (OR)=1.15, 95% confidence interval (95% CI)=1.12-1.19;
+                        <i>P</i> = 1.60 x 10
+                        <sup>-19</sup>).
+                        <i>CD33</i> rs3865444:C&gt;A was not associated with AD in the dataset. The meta-analysis was also negative (OR=0.98, 95% CI=0.93-1.04;
+                        <i>P</i>=0.48). After exploring LRLD patterns between
+                        <i>APOE</i> and
+                        <i>CD33</i> in several datasets, we found significant LD (D' &gt;0.20;
+                        <i>P</i> &lt;0.030) between
+                        <i>APOE</i>-&#x190;2 and
+                        <i>CD33</i> rs3865444C&gt;A in two of five datasets, suggesting the presence of a non-universal long range interaction between these loci affecting to some populations. In conclusion, we provide here evidence of genetic association of the
+                        <i>ABCA7</i> locus in the Spanish population and also propose a plausible explanation for the controversy on the contribution of
+                        <i>CD33</i> to AD susceptibility.
+                    </AbstractText>
+                </Abstract>
+                <AuthorList CompleteYN="Y">
+                    <Author ValidYN="Y">
+                        <LastName>Moreno-Grau</LastName>
+                        <ForeName>Sonia</ForeName>
+                        <Initials>S</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Hern&#xe1;ndez</LastName>
+                        <ForeName>Isabel</ForeName>
+                        <Initials>I</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Heilmann-Heimbach</LastName>
+                        <ForeName>Stefanie</ForeName>
+                        <Initials>S</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Institute of Human Genetics, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Genomics, Life &amp; Brain Center, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Ruiz</LastName>
+                        <ForeName>Susana</ForeName>
+                        <Initials>S</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Rosende-Roca</LastName>
+                        <ForeName>Mait&#xe9;e</ForeName>
+                        <Initials>M</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Maule&#xf3;n</LastName>
+                        <ForeName>Ana</ForeName>
+                        <Initials>A</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Vargas</LastName>
+                        <ForeName>Liliana</ForeName>
+                        <Initials>L</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Rodr&#xed;guez-G&#xf3;mez</LastName>
+                        <ForeName>Octavio</ForeName>
+                        <Initials>O</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Alegret</LastName>
+                        <ForeName>Montserrat</ForeName>
+                        <Initials>M</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Espinosa</LastName>
+                        <ForeName>Ana</ForeName>
+                        <Initials>A</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Ortega</LastName>
+                        <ForeName>Gemma</ForeName>
+                        <Initials>G</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Aguilera</LastName>
+                        <ForeName>Nuria</ForeName>
+                        <Initials>N</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Abdelnour</LastName>
+                        <ForeName>Carla</ForeName>
+                        <Initials>C</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Neuroimaging Initiative</LastName>
+                        <ForeName>Alzheimer's Disease</ForeName>
+                        <Initials>AD</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Institute of Human Genetics, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Genomics, Life &amp; Brain Center, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Psychiatry and Psychotherapy, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>German Center for Neurodegenerative Diseases, DZNE, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Psychiatry and Psychotherapy, University of Cologne, Cologne, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Memory Unit, University Hospital La Paz-Cantoblanco, Madrid, Spain.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Dementia Unit, University Hospital Virgen de la Arrixaca, Murcia, Spain.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Centro de Investigaci&#xf3;n Biom&#xe9;dica en Red de Diabetes y Enfermedades Metab&#xf3;licas Asociadas, CIBERDEM, Spain, Hospital Cl&#xed;nico San Carlos, Madrid, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Gil</LastName>
+                        <ForeName>Silvia</ForeName>
+                        <Initials>S</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Maier</LastName>
+                        <ForeName>Wolfgang</ForeName>
+                        <Initials>W</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Psychiatry and Psychotherapy, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>German Center for Neurodegenerative Diseases, DZNE, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Sotolongo-Grau</LastName>
+                        <ForeName>Oscar</ForeName>
+                        <Initials>O</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>T&#xe1;rraga</LastName>
+                        <ForeName>Llu&#xed;s</ForeName>
+                        <Initials>L</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Ramirez</LastName>
+                        <ForeName>Alfredo</ForeName>
+                        <Initials>A</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Institute of Human Genetics, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Psychiatry and Psychotherapy, University of Bonn, Bonn, Germany.</Affiliation>
+                        </AffiliationInfo>
+                        <AffiliationInfo>
+                            <Affiliation>Department of Psychiatry and Psychotherapy, University of Cologne, Cologne, Germany.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>L&#xf3;pez-Arrrieta</LastName>
+                        <ForeName>Jes&#xfa;s</ForeName>
+                        <Initials>J</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Memory Unit, University Hospital La Paz-Cantoblanco, Madrid, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Ant&#xfa;nez</LastName>
+                        <ForeName>Carmen</ForeName>
+                        <Initials>C</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Dementia Unit, University Hospital Virgen de la Arrixaca, Murcia, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Serrano-R&#xed;os</LastName>
+                        <ForeName>Manuel</ForeName>
+                        <Initials>M</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Centro de Investigaci&#xf3;n Biom&#xe9;dica en Red de Diabetes y Enfermedades Metab&#xf3;licas Asociadas, CIBERDEM, Spain, Hospital Cl&#xed;nico San Carlos, Madrid, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Boada</LastName>
+                        <ForeName>Merc&#xe8;</ForeName>
+                        <Initials>M</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                    <Author ValidYN="Y">
+                        <LastName>Ruiz</LastName>
+                        <ForeName>Agust&#xed;n</ForeName>
+                        <Initials>A</Initials>
+                        <AffiliationInfo>
+                            <Affiliation>Research Center and Memory Clinic of Fundaci&#xf3; ACE, Institut Catal&#xe0; de Neuroci&#xe8;ncies Aplicades, Univesitat Internacional de Catalunya, Barcelona, Spain.</Affiliation>
+                        </AffiliationInfo>
+                    </Author>
+                </AuthorList>
+                <Language>eng</Language>
+                <GrantList CompleteYN="Y">
+                    <Grant>
+                        <GrantID>U01 AG024904</GrantID>
+                        <Acronym>AG</Acronym>
+                        <Agency>NIA NIH HHS</Agency>
+                        <Country>United States</Country>
+                    </Grant>
+                </GrantList>
+                <PublicationTypeList>
+                    <PublicationType UI="D016428">Journal Article</PublicationType>
+                </PublicationTypeList>
+                <ArticleDate DateType="Electronic">
+                    <Year>2018</Year>
+                    <Month>05</Month>
+                    <Day>15</Day>
+                </ArticleDate>
+            </Article>
+            <MedlineJournalInfo>
+                <Country>United States</Country>
+                <MedlineTA>Oncotarget</MedlineTA>
+                <NlmUniqueID>101532965</NlmUniqueID>
+                <ISSNLinking>1949-2553</ISSNLinking>
+            </MedlineJournalInfo>
+            <KeywordList Owner="NOTNLM">
+                <Keyword MajorTopicYN="N">ABCA7</Keyword>
+                <Keyword MajorTopicYN="N">APOE</Keyword>
+                <Keyword MajorTopicYN="N">CD33</Keyword>
+                <Keyword MajorTopicYN="N">Gerotarget</Keyword>
+                <Keyword MajorTopicYN="N">late onset Alzheimer&#x2019;s disease</Keyword>
+                <Keyword MajorTopicYN="N">linkage disequilibrium</Keyword>
+            </KeywordList>
+            <CoiStatement>CONFLICTS OF INTEREST None. The authors declare that they have no competing interest.</CoiStatement>
+        </MedlineCitation>
+        <PubmedData>
+            <History>
+                <PubMedPubDate PubStatus="received">
+                    <Year>2017</Year>
+                    <Month>12</Month>
+                    <Day>13</Day>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="accepted">
+                    <Year>2018</Year>
+                    <Month>3</Month>
+                    <Day>22</Day>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="entrez">
+                    <Year>2018</Year>
+                    <Month>6</Month>
+                    <Day>7</Day>
+                    <Hour>6</Hour>
+                    <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="pubmed">
+                    <Year>2018</Year>
+                    <Month>6</Month>
+                    <Day>7</Day>
+                    <Hour>6</Hour>
+                    <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="medline">
+                    <Year>2018</Year>
+                    <Month>6</Month>
+                    <Day>7</Day>
+                    <Hour>6</Hour>
+                    <Minute>1</Minute>
+                </PubMedPubDate>
+            </History>
+            <PublicationStatus>epublish</PublicationStatus>
+            <ArticleIdList>
+                <ArticleId IdType="pubmed">29872490</ArticleId>
+                <ArticleId IdType="pmc">PMC5973862</ArticleId>
+                <ArticleId IdType="doi">10.18632/oncotarget.25083</ArticleId>
+                <ArticleId IdType="pii">25083</ArticleId>
+            </ArticleIdList>
+            <ReferenceList>
+                <Reference>
+                    <Citation>Moreno-Grau S, Ruiz A. Genome research in pre-dementia stages of Alzheimer&#x2019;s disease. Expert Rev Mol Med. 2016;18:e11.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">27237222</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Corder E, Saunders A. Gene dose of apolipoprotein E type 4 allele and the risk of Alzheimer&#x2019;s disease in late onset families. Science. 1993;8:41&#x2013;3.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">8346443</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Corder EH, Saunders AM, Risch NJ, Strittmatter WJ, Schmechel DE, Gaskell PC, Rimmler JB, Locke PA, Conneally PM, Schmader KE. Protective effect of apolipoprotein E type 2 allele for late onset Alzheimer disease. Nat Genet. 1994;7:180&#x2013;4.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">7920638</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Roses AD, Lutz MW, Amrine-Madsen H, Saunders AM, Crenshaw DG, Sundseth SS, Huentelman MJ, Welsh-Bohmer KA, Reiman EM. A TOMM40 variable-length polymorphism predicts the age of late-onset Alzheimer&#x2019;s disease. Pharmacogenomics J. 2010;10:375&#x2013;84.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC2946560</ArticleId>
+                        <ArticleId IdType="pubmed">20029386</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Seshadri S, Fitzpatrick AL, Ikram MA, DeStefano AL, Gudnason V, Boada M, Bis JC, Smith AV, Carassquillo MM, Lambert JC, Harold D, Schrijvers EM, Ramirez-Lorca R, et al. Genome-wide analysis of genetic loci associated with Alzheimer disease. JAMA. 2010;303:1832&#x2013;40.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC2989531</ArticleId>
+                        <ArticleId IdType="pubmed">20460622</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Hollingworth P, Harold D, Sims R, Gerrish A, Lambert JC, Carrasquillo MM, Abraham R, Hamshere ML, Pahwa JS, Moskvina V, Dowzell K, Jones N, Stretton A, et al. Common variants at ABCA7, MS4A6A/MS4A4E, EPHA1, CD33 and CD2AP are associated with Alzheimer&#x2019;s disease. Nat Genet. 2011;43:429&#x2013;35.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3084173</ArticleId>
+                        <ArticleId IdType="pubmed">21460840</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Cruchaga C, Karch CM, Jin SC, Benitez BA, Cai Y, Guerreiro R, Harari O, Norton J, Budde J, Bertelsen S, Jeng AT, Cooper B, Skorupa T, et al. Rare coding variants in the phospholipase D3 gene confer risk for Alzheimer &#x2019; s disease. Nature. 2014;505:550&#x2013;4.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC4050701</ArticleId>
+                        <ArticleId IdType="pubmed">24336208</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Naj AC, Jun G, Beecham GW, Wang LS, Vardarajan BN, Buros J, Gallins PJ, Buxbaum JD, Jarvik GP, Crane PK, Larson EB, Bird TD, Boeve BF, et al. Common variants at MS4A4/MS4A6E, CD2AP, CD33 and EPHA1 are associated with late-onset Alzheimer&#x2019;s disease. Nat Genet. 2011;43:436&#x2013;41.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3090745</ArticleId>
+                        <ArticleId IdType="pubmed">21460841</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Lambert JC, Ibrahim-Verbaas CA, Harold D, Naj AC, Sims R, Bellenguez C, DeStafano AL, Bis JC, Beecham GW, Grenier-Boley B, Russo G, Thorton-Wells TA, Jones N, et al. Meta-analysis of 74,046 individuals identifies 11 new susceptibility loci for Alzheimer&#x2019;s disease. Nat Genet. 2013;45:1452&#x2013;8.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3896259</ArticleId>
+                        <ArticleId IdType="pubmed">24162737</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Reitz C, Jun G, Naj A, Rajbhandary R, Vardarajan BN, Wang LS, Valladares O, Lin CF, Larson EB, Graff-Radford NR, Evans D, De Jager PL, Crane PK, et al. Variants in the ATP-binding cassette transporter (ABCA7), apolipoprotein E &#x3b5;4,and the risk of late-onset Alzheimer disease in African Americans. JAMA. 2013;309:1483&#x2013;92.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3667653</ArticleId>
+                        <ArticleId IdType="pubmed">23571587</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Chouraki V, Seshadri S. Genetics of Alzheimer&#x2019;s disease. Adv Genet. 2014;87:245&#x2013;94.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">25311924</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Heilmann S, Drichel D, Clarimon J, Fern&#xe1;ndez V, Lacour A, Wagner H, Thelen M, Hern&#xe1;ndez I, Fortea J, Alegret M, Blesa R, Maule&#xf3;n A, Roca MR, et al. PLD3 in non-familial Alzheimer&#x2019;s disease. Nature. 2015;520:E3&#x2013;5.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">25832411</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Carrasquillo MM, Belbin O, Hunter TA, Ma L, Bisceglio GD, Zou F, Crook JE, Pankratz VS, Sando SB, Aasly JO, Barcikowska M, Wszolek ZK, Dickson DW, et al. Replication of EPHA1 and CD33 associations with late-onset Alzheimer&#x2019;s disease: a multi-centre case-control study. Mol Neurodegener. 2011;6:54.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3157442</ArticleId>
+                        <ArticleId IdType="pubmed">21798052</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Sakae N, Liu CC, Shinohara M, Frisch-Daiello J, Ma L, Yamazaki Y, Tachibana M, Younkin L, Kurti A, Carrasquillo MM, Zou F, Sevlever D, Bisceglio G, et al. ABCA7 Deficiency Accelerates Amyloid-&#x3b2; Generation and Alzheimer&#x2019;s Neuronal Pathology. J Neurosci. 2016;36:3848&#x2013;59.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC4812140</ArticleId>
+                        <ArticleId IdType="pubmed">27030769</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Jehle AW, Gardai SJ, Li S, Linsel-Nitschke P, Morimoto K, Janssen WJ, Vandivier RW, Wang N, Greenberg S, Dale BM, Qin C, Henson PM, Tall AR. ATP-binding cassette transporter A7 enhances phagocytosis of apoptotic cells and associated ERK signaling in macrophages. J Cell Biol. 2006;174:547&#x2013;56.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC2064260</ArticleId>
+                        <ArticleId IdType="pubmed">16908670</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Kim WS, Li H, Ruberu K, Chan S, Elliott DA, Low JK, Cheng D, Karl T, Garner B. Deletion of Abca7 increases cerebral amyloid-&#x3b2; accumulation in the J20 mouse model of Alzheimer&#x2019;s disease. J Neurosci. 2013;33:4387&#x2013;94.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC6704948</ArticleId>
+                        <ArticleId IdType="pubmed">23467355</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Bradshaw EM, Chibnik LB, Keenan BT, Ottoboni L, Raj T, Tang A, Rosenkrantz LL, Imboywa S, Lee M, Von Korff A, Morris MC, Evans DA, Johnson K, et al. CD33 Alzheimer&#x2019;s disease locus: altered monocyte function and amyloid biology. Nat Neurosci. 2013;16:848&#x2013;50.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3703870</ArticleId>
+                        <ArticleId IdType="pubmed">23708142</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Cruchaga C, Nowotny P, Kauwe JSK, Ridge PG, Mayo K, Bertelsen S, Hinrichs A, Fagan AM, Holtzman DM, Morris JC, Goate AM. Association and expression analyses with single-nucleotide polymorphisms in TOMM40 in Alzheimer disease. Arch Neurol. 2011;68:1013&#x2013;9.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3204798</ArticleId>
+                        <ArticleId IdType="pubmed">21825236</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Mueller JC. Linkage disequilibrium for different scales and applications. Brief Bioinform. 2004;5:355&#x2013;64.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">15606972</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Ardlie KG, Kruglyak L, Seielstad M. Patterns of Linkage Disequilibrium in the Human Genome. Nat Rev Genet. 2002;3:299&#x2013;309.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">11967554</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Campbell CD, Ogburn EL, Lunetta KL, Lyon HN, Freedman ML, Groop LC, Altshuler D, Ardlie KG, Hirschhorn JN. Demonstrating stratification in a European American population. Nat Genet. 2005;37:868&#x2013;72.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">16041375</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Dawson E, Abecasis GR, Bumpstead S, Chen Y, Hunt S, Beare DM, Pabial J, Dibling T, Tinsley E, Kirby S, Carter D, Papaspyridonos M, Livingstone S, et al. A first-generation linkage disequilibrium map of human chromosome 22. Nature. 2002;418:544&#x2013;8.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">12110843</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Beckmann JS, Estivill X, Antonarakis SE. Copy number variants and genetic traits: closer to the resolution of phenotypic to genotypic variability. Nat Rev Genet. 2007;8:639&#x2013;46.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">17637735</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Ant&#xfa;nez C, Boada M, Gonz&#xe1;lez-P&#xe9;rez A, Gay&#xe1;n J, Ram&#xed;rez-Lorca R, Mar&#xed;n J, Hern&#xe1;ndez I, Moreno-Rey C, Mor&#xf3;n FJ, L&#xf3;pez-Arrieta J, Maule&#xf3;n A, Rosende-Roca M, Noguera-Perea F, et al. The membrane-spanning 4-domains, subfamily A (MS4A) gene cluster contains a common variant associated with Alzheimer&#x2019;s disease. Genome Med. 2011;3:33.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3219074</ArticleId>
+                        <ArticleId IdType="pubmed">21627779</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Ruiz A, Heilmann S, Becker T, Hern&#xe1;ndez I, Wagner H, Thelen M, Maule&#xf3;n A, Rosende-Roca M, Bellenguez C, Bis JC, Harold D, Gerrish A, Sims R, et al. Follow-up of loci from the International Genomics of Alzheimer&#x2019;s Disease Project identifies TRIP4 as a novel susceptibility gene. Transl Psychiatry. 2014;4:e358.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3944635</ArticleId>
+                        <ArticleId IdType="pubmed">24495969</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Calero O, Hortig&#xfc;ela R, Bullido M, Calero M. Apolipoprotein E genotyping method by Real Time PCR, a fast and cost-effective alternative to the TaqMan&#xae; and FRET assays. J Neurosci Methods. 2009;183:238&#x2013;40.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">19583979</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MA, Bender D, Maller J, Sklar P, de Bakker PI, Daly MJ, Sham PC. PLINK: a tool set for whole-genome association and population-based linkage analyses. Am J Hum Genet. 2007;81:559&#x2013;75.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC1950838</ArticleId>
+                        <ArticleId IdType="pubmed">17701901</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Kjeldsen EW, Tybj&#xe6;rg-Hansen A, Nordestgaard BG, Frikke-Schmidt R. ABCA7and risk of dementia and vascular disease in the Danish population. Ann Clin Transl Neurol. 2018;5:41&#x2013;51.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC5771325</ArticleId>
+                        <ArticleId IdType="pubmed">29376091</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Moreno DJ, Ruiz S, R&#xed;os &#xc1;, Lopera F, Ostos H, Via M, Bedoya G. Association of GWAS Top Genes With Late-Onset Alzheimer&#x2019;s Disease in Colombian Population. Am J Alzheimers Dis Other Demen. 2017;32:27&#x2013;35.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">28084078</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Zhang DF, Li J, Wu H, Cui Y, Bi R, Zhou HJ, Wang HZ, Zhang C, Wang D, Kong QP, Li T, Fang Y, Jiang T, et al. CFH Variants Affect Structural and Functional Brain Changes and Genetic Risk of Alzheimer&#x2019;s Disease. Neuropsychopharmacology. 2015;41:1034&#x2013;1035.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC4748428</ArticleId>
+                        <ArticleId IdType="pubmed">26243271</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Jiao B, Liu X, Zhou L, Wang MH, Zhou Y, Xiao T, Zhang W, Sun R, Waye MM, Tang B, Shen L. Polygenic Analysis of Late-Onset Alzheimer&#x2019;s Disease from Mainland China. PLoS One. 2015;10:e0144898.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC4683047</ArticleId>
+                        <ArticleId IdType="pubmed">26680604</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Mao YF, Guo ZY, Pu JL, Chen YX, Zhang BR. Association of CD33 and MS4A cluster variants with Alzheimer&#x2019;s disease in East Asian Populations. Neurosci Lett. 2015;609:235&#x2013;239.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">26455864</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Ebbert MT, Ridge PG, Wilson AR, Sharp AR, Bailey M, Norton MC, Tschanz JT, Munger RG, Corcoran CD, Kauwe JSK. Population-based Analysis of Alzheimer&#x2019;s Disease Risk Alleles Implicates Genetic Interactions. Biol Psychiatry. 2014;75:732&#x2013;7.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3867586</ArticleId>
+                        <ArticleId IdType="pubmed">23954108</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Omoumi A, Fok A, Greenwood T, Sadovnick AD, Feldman HH, Hsiung GY. Evaluation of late-onset Alzheimer disease genetic susceptibility risks in a Canadian population. Neurobiol Aging. 2014;35:936.e5-12.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">24176626</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Tan L, Yu JT, Zhang W, Wu ZC, Zhang Q, Liu QY, Wang W, Wang HF, Ma XY, Cui WZ. Association of GWAS-linked loci with late-onset Alzheimer&#x2019;s disease in a northern Han Chinese population. Alzheimers Dement. 2013;9:546&#x2013;53.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">23232270</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Chung SJ, Lee JH, Kim SY, You S, Kim MJ, Lee JY, Koh J. Association of GWAS top hits with late-onset Alzheimer disease in Korean population. Alzheimer Dis Assoc Disord. 2013;27:250&#x2013;7.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">22975751</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Deng YL, Liu LH, Wang Y, Tang HD, Ren RJ, Xu W, Ma JF, Wang LL, Zhuang JP, Wang G, Chen SD. The prevalence of CD33 and MS4A6A variant in Chinese Han population with Alzheimer&#x2019;s disease. Hum Genet. 2012;131:1245&#x2013;9.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">22382309</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Logue MW. A Comprehensive Genetic Association Study of Alzheimer Disease in African Americans. Arch Neurol. 2011;68:1569.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3356921</ArticleId>
+                        <ArticleId IdType="pubmed">22159054</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Miyashita A, Koike A, Jun G, Wang LS, Takahashi S, Matsubara E, Kawarabayashi T, Shoji M, Tomita N, Arai H, Asada T, Harigaya Y, Ikeda M, et al. SORL1 is genetically associated with late-onset Alzheimer&#x2019;s disease in Japanese, Koreans and Caucasians. PLoS One. 2013;8:e58618.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3614978</ArticleId>
+                        <ArticleId IdType="pubmed">23565137</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Weiner M, Aisen P, Jack C, Jr, Jaugust W, Trojanowski J, Shaw L, Saykin AJ, Morris JC, Cairns N, Laurel A, Toga A, Green R, Walter S, et al. The Alzheimer&#x2019;s disease neuroinmaging iniciative: Progress report and future plans. Alzheimers Dement. 2010;6:202&#x2013;11.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC2927112</ArticleId>
+                        <ArticleId IdType="pubmed">20451868</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Li H, Wetten S, Li L, St Jean PL, Upmanyu R, Surh L, Hosford D, Barnes MR, Briley JD, Borrie M, Coletta N, Delisle R, Dhalla D, et al. Candidate single-nucleotide polymorphisms from a genomewide association study of Alzheimer disease. Arch Neurol. 2008;65:45&#x2013;53.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pubmed">17998437</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Wijsman EM, Pankratz ND, Choi Y, Rothstein JH, Faber KM, Cheng R, Lee JH, Bird TD, Bennett DA, Diaz-Arrastia R, Goate AM, Farlow M, Ghetti B, et al. Genome-wide association of familial late-onset Alzheimer&#x2019;s disease replicates BIN1 and CLU and nominates CUGBP2 in interaction with APOE. PLoS Genet. 2011;7:e1001308.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC3040659</ArticleId>
+                        <ArticleId IdType="pubmed">21379329</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+                <Reference>
+                    <Citation>Zhang Q, Calus MP, Guldbrandtsen B, Lund MS, Sahana G. Estimation of inbreeding using pedigree, 50k SNP chip genotypes and full sequence data in three cattle breeds. BMC Genet. 2015;16:88.</Citation>
+                    <ArticleIdList>
+                        <ArticleId IdType="pmc">PMC4509611</ArticleId>
+                        <ArticleId IdType="pubmed">26195126</ArticleId>
+                    </ArticleIdList>
+                </Reference>
+            </ReferenceList>
+        </PubmedData>
+    </PubmedArticle>
+</PubmedArticleSet>