JabRef · koppor · Jan 16, 2024 · Oct 17, 2023 · Oct 17, 2023 · Oct 17, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
 
 ### Added
 
+- We added a fetcher for ISIDORE, simply paste in the link into the text field or the last 6 digits in the link that identify that paper. [#10423](https://github.com/JabRef/jabref/issues/10423)
 - We added a dropdown menu to let users change the reference library during AUX file import. [#10472](https://github.com/JabRef/jabref/issues/10472)
 - We added a button to let users reset the cite command to the default value. [#10569](https://github.com/JabRef/jabref/issues/10569)
 

diff --git a/PRIVACY.md b/PRIVACY.md
@@ -72,6 +72,7 @@ These third-party services are the following:
 | [The SAO/NASA Astrophysics Data System](https://ui.adsabs.harvard.edu/)                                         | <https://ui.adsabs.harvard.edu/help/privacy/> |
 | [Unpaywall](https://unpaywall.org/)                                                                             | <https://unpaywall.org/legal/privacy> |
 | [zbMATH Open](https://www.zbmath.org)                                                                           | <https://zbmath.org/privacy-policy/> |
+| [ISIDORE](https://isidore.science/)                                                                             | <https://isidore.science/credit> |
 
 [1]: *Note: The Mr. DLib service is used for the related articles tab in the entry editor and collects also your language, your browser and operating system (by default*disabled*).*
 

diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java
@@ -27,6 +27,7 @@
 import org.jabref.logic.importer.fetcher.GvkFetcher;
 import org.jabref.logic.importer.fetcher.IEEE;
 import org.jabref.logic.importer.fetcher.INSPIREFetcher;
+import org.jabref.logic.importer.fetcher.ISIDOREFetcher;
 import org.jabref.logic.importer.fetcher.IacrEprintFetcher;
 import org.jabref.logic.importer.fetcher.LOBIDFetcher;
 import org.jabref.logic.importer.fetcher.LibraryOfCongress;
@@ -145,6 +146,7 @@ public static SortedSet<IdBasedFetcher> getIdBasedFetchers(ImportFormatPreferenc
         set.add(new LibraryOfCongress(importFormatPreferences));
         set.add(new IacrEprintFetcher(importFormatPreferences));
         set.add(new RfcFetcher(importFormatPreferences));
+        set.add(new ISIDOREFetcher());
         set.add(new Medra());
         // set.add(new JstorFetcher(importFormatPreferences));
         return set;

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ISIDOREFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/ISIDOREFetcher.java
@@ -0,0 +1,222 @@
+package org.jabref.logic.importer.fetcher;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.Optional;
+import java.util.StringJoiner;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.jabref.logic.help.HelpFile;
+import org.jabref.logic.importer.FetcherException;
+import org.jabref.logic.importer.IdBasedParserFetcher;
+import org.jabref.logic.importer.Parser;
+import org.jabref.logic.net.URLDownload;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.types.EntryType;
+import org.jabref.model.entry.types.StandardEntryType;
+
+import org.jooq.lambda.Unchecked;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * Fetcher for <a href="https://isidore.science">ISIDORE</a>```
+ * Will take in the link to the website or the last six digits that identify the reference
+ * Uses <a href="https://isidore.science/api">ISIDORE's API</a>. */
+public class ISIDOREFetcher implements IdBasedParserFetcher {
+    private static final int LINKLENGTH = 47;
+
+    private String URL;
+    private Parser parser;
+
+    private DocumentBuilderFactory factory;
+
+    public ISIDOREFetcher() {
+        this.factory = DocumentBuilderFactory.newInstance();
+        this.parser = getParser();
+    }
+
+    @Override
+    public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException, FetcherException {
+        identifier = identifier.trim();
+
+        if (identifier.length() == 6) {
+            // this allows the user to input only the six-digit code at the end.
+            identifier = "https://isidore.science/document/10670/1." + identifier;
+        } else if (identifier.length() == 8) {
+            // allows the user to put in the eight digits including the "1."
+            identifier = "https://isidore.science/document/10670/" + identifier;
+        }
+
+        if (identifier.startsWith("https://isidore.science/document/10670/1.") && (identifier.length() == LINKLENGTH)) {
+            this.URL = identifier;
+            // change the link to be the correct link for the api.
+            identifier = identifier.replace("/document/", "/resource/content?uri=");
+            identifier = identifier.replace("https://isidore.science/", "https://api.isidore.science/");
+            return new URL(identifier);
+        } else {
+            // Throw an error if the link does not start with the link above
+            throw new FetcherException("Could not construct url for ISIDORE");
+        }
+    }
+
+    @Override
+    public Parser getParser() {
+        return xmlData -> {
+
+            try {
+                DocumentBuilder builder = this.factory.newDocumentBuilder();
+                Document document = builder.parse(xmlData);
+
+                // Assuming the root element represents an entry
+                Element entryElement = document.getDocumentElement();
+
+                if (entryElement == null) {
+                    return Collections.emptyList();
+                }
+
+                return Collections.singletonList(xmlItemToBibEntry(document.getDocumentElement()));
+            } catch (
+                    ParserConfigurationException |
+                    IOException |
+                    SAXException e) {
+                Unchecked.throwChecked(new FetcherException("Issue with parsing link"));
+            }
+            return null;
+        };
+    }
+
+    private BibEntry xmlItemToBibEntry(Element itemElement) {
+        return new BibEntry(getType(itemElement.getElementsByTagName("types").item(0).getChildNodes()))
+                .withField(StandardField.TITLE, itemElement.getElementsByTagName("title").item(0).getTextContent().replaceAll("\"", ""))
+                .withField(StandardField.AUTHOR, getAuthor(itemElement.getElementsByTagName("enrichedCreators").item(0)))
+                .withField(StandardField.YEAR, itemElement.getElementsByTagName("date").item(0).getChildNodes().item(1).getTextContent().substring(0, 4))
+                .withField(StandardField.JOURNAL, getJournal(itemElement.getElementsByTagName("dc:source")))
+                .withField(StandardField.PUBLISHER, getPublishers(itemElement.getElementsByTagName("publishers").item(0)))
+                .withField(StandardField.DOI, getDOI(itemElement.getElementsByTagName("ore").item(0).getChildNodes()))
+                .withField(StandardField.URL, this.URL);
+    }
+
+    private String getDOI(NodeList list) {
+        for (int i = 0; i < list.getLength(); i++) {
+            String content = list.item(i).getTextContent();
+            if (content.contains("DOI:")) {
+                return content.replace("DOI: ", "");
+            }
+            if (list.item(i).getTextContent().contains("doi:")) {
+                return content.replace("info:doi:", "");
+            }
+        }
+        return "";
+    }
+
+    // Get the type of the document, ISIDORE only seems to have select types, also their types are different to
+    // those used by JabRef.
+    private EntryType getType(NodeList list) {
+        for (int i = 0; i < list.getLength(); i++) {
+            String type = list.item(i).getTextContent();
+            if (type.contains("article") || type.contains("Article")) {
+                return StandardEntryType.Article;
+            }
+            if (type.contains("thesis") || type.contains("Thesis")) {
+                return StandardEntryType.Thesis;
+            }
+            if (type.contains("book") || type.contains("Book")) {
+                return StandardEntryType.Book;
+            }
+        }
+        return StandardEntryType.Misc;
+    }
+
+    // Gets all the authors, separated with the word "and"
+    // For some reason the author field sometimes has extra numbers and letters.
+    private String getAuthor(Node itemElement) {
+        StringJoiner stringJoiner = new StringJoiner(" and ");
+        for (int i = 1; i < itemElement.getChildNodes().getLength(); i += 2) {
+            String next = removeNumbers(itemElement.getChildNodes().item(i).getTextContent()).replaceAll("\\s+", " ");
+            next = next.replace("\n", "");
+            if (next.isBlank()) {
+                continue;
+            }
+            stringJoiner.add(next);
+        }
+        return (stringJoiner.toString().substring(0, stringJoiner.length())).trim().replaceAll("\\s+", " ");
+    }
+
+    // Remove numbers from a string and everything after the number, (helps with the author field).
+    private String removeNumbers(String string) {
+        for (int i = 0; i < string.length(); i++) {
+            if (Character.isDigit(string.charAt(i))) {
+                return string.substring(0, i);
+            }
+        }
+        return string;
-        for (int i = 0; i < string.length(); i++) {
-            if (Character.isDigit(string.charAt(i))) {
-                return string.substring(0, i);
-            }
-        }
-        return string;
+        return string.replaceFirst("\\d.*", "");
-        for (int i = 0; i < string.length(); i++) {
-            if (Character.isDigit(string.charAt(i))) {
-                return string.substring(0, i);
-            }
-        }
-        return string;
+        return string.replaceFirst("\\d.*", "");
+    }
+
+    // In the XML file the publishers node often lists multiple publisher e.g.
+    // <publisher origin="HAL CCSD">HAL CCSD</publisher>
+    // <publisher origin="Elsevier">Elsevier</publisher>
+    // Therefore this function simply gets all of them.
+    private String getPublishers(Node itemElement) {
+        if (itemElement == null) {
+            return "";
+        }
+        StringJoiner stringJoiner = new StringJoiner(", ");
+        for (int i = 0; i < itemElement.getChildNodes().getLength(); i++) {
+            if (itemElement.getChildNodes().item(i).getTextContent().isBlank()) {
+                continue;
+            }
+            stringJoiner.add(itemElement.getChildNodes().item(i).getTextContent().trim());
+        }
+        return stringJoiner.toString();
+    }
+
+    private String getJournal(NodeList list) {
+        // If there is no journal, return an empty string.
+        if (list.getLength() == 0) {
+            return "";
+        }
+        String reference = list.item(list.getLength() - 1).getTextContent();
+        for (int i = 0; i < reference.length(); i++) {
+            if (reference.charAt(i) == ',') {
+                return reference.substring(0, i);
+            }
+        }
+        return "";
+    }
+
+    @Override
+    public void doPostCleanup(BibEntry entry) {
+        IdBasedParserFetcher.super.doPostCleanup(entry);
+    }
+
+    @Override
+    public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
+        return IdBasedParserFetcher.super.performSearchById(identifier);
+    }
+
+    @Override
+    public String getName() {
+        return "ISIDORE";
+    }
+
+    @Override
+    public Optional<HelpFile> getHelpPage() {
+        return IdBasedParserFetcher.super.getHelpPage();
+    }
+
+    @Override
+    public URLDownload getUrlDownload(URL url) {
+        return IdBasedParserFetcher.super.getUrlDownload(url);
+    }
+}
diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ISIDOREFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ISIDOREFetcherTest.java
@@ -0,0 +1,85 @@
+package org.jabref.logic.importer.fetcher;
+
+import java.util.Optional;
+
+import org.jabref.logic.importer.FetcherException;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.types.StandardEntryType;
+import org.jabref.testutils.category.FetcherTest;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+@FetcherTest
+public class ISIDOREFetcherTest {
+
+    private ISIDOREFetcher fetcher;
+
+    @BeforeEach
+    public void setup() {
+        this.fetcher = new ISIDOREFetcher();
+    }
+
+    @Test
+    public void checkArticle() throws FetcherException {
+        BibEntry expected = new BibEntry(StandardEntryType.Article)
+                .withField(StandardField.TITLE, "Investigating day-to-day variability of transit usage on a multimonth scale with smart card data. A case study in Lyon")
+                .withField(StandardField.AUTHOR, "Oscar Egu and Patrick Bonnel")
+                .withField(StandardField.YEAR, "2020")
+                .withField(StandardField.JOURNAL, "Travel Behaviour and Society")
+                .withField(StandardField.PUBLISHER, "HAL CCSD, Elsevier")
+                .withField(StandardField.DOI, "10.1016/j.tbs.2019.12.003")
+                .withField(StandardField.URL, "https://isidore.science/document/10670/1.hrzlqd");
+
+        Optional<BibEntry> actual = fetcher.performSearchById("https://isidore.science/document/10670/1.hrzlqd");
+
+        assertEquals(Optional.of(expected), actual);
+    }
+
+    @Test
+    public void checkArticle2() throws FetcherException {
+        BibEntry expected = new BibEntry(StandardEntryType.Article)
+                .withField(StandardField.TITLE, " Anthony B. Atkinson, Inequality – What Can Be Done ? Cambridge (Mass.) Harvard University Press, 2015, XI-384 p. ")
+                .withField(StandardField.AUTHOR, "Benoît Rapoport")
+                .withField(StandardField.YEAR, "2016")
+                .withField(StandardField.JOURNAL, "Population (édition française)")
+                .withField(StandardField.PUBLISHER, "HAL CCSD, INED - Institut national d’études démographiques")
+                .withField(StandardField.DOI, "10.3917/popu.1601.0153")
+                .withField(StandardField.URL, "https://isidore.science/document/10670/1.d2vlam");
+
+        Optional<BibEntry> actual = fetcher.performSearchById("d2vlam");
+
+        assertEquals(Optional.of(expected), actual);
+    }
+
+    @Test
+    public void checkThesis() throws FetcherException {
+        BibEntry expected = new BibEntry(StandardEntryType.Thesis)
+                .withField(StandardField.TITLE, "Mapping English L2 errors : an integrated system and textual approach")
+                .withField(StandardField.AUTHOR, "Clive Hamilton")
+                .withField(StandardField.YEAR, "2015")
+                .withField(StandardField.URL, "https://isidore.science/document/10670/1.m05oth");
+
+        Optional<BibEntry> actual = fetcher.performSearchById("1.m05oth");
+
+        assertEquals(Optional.of(expected), actual);
+    }
+
+    @Test
+    public void checkArticle3() throws FetcherException {
+        BibEntry expected = new BibEntry(StandardEntryType.Article)
+                .withField(StandardField.TITLE, "Salvage Lymph Node Dissection for Nodal Recurrent Prostate Cancer: A Systematic Review.")
+                .withField(StandardField.AUTHOR, "G. Ploussard and G. Gandaglia and H. Borgmann and P. de Visschere and I. Heidegger and A. Kretschmer and R. Mathieu and C. Surcel and D. Tilki and I. Tsaur and M. Valerio and R. van den Bergh and P. Ost and A. Briganti")
+                .withField(StandardField.YEAR, "2019")
+                .withField(StandardField.JOURNAL, "European urology")
+                .withField(StandardField.DOI, "10.1016/j.eururo.2018.10.041")
+                .withField(StandardField.URL, "https://isidore.science/document/10670/1.zm7q2x");
+
+        Optional<BibEntry> actual = fetcher.performSearchById("https://isidore.science/document/10670/1.zm7q2x");
+
+        assertEquals(Optional.of(expected), actual);
+    }
+}