Fix ArrayIndexOutOfBoundsException on second pdf import (#4426)

* Fix ArrayIndexOutOfBoundsException on second pdf import The variable formally known as i is a global variable which had -1 after the first run and therefore threw an exception * add changelog and fix * add test
JabRef · Oct 30, 2018 · 583c61f · 583c61f
1 parent 11a62dd
commit 583c61f
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 38 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -31,7 +31,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
 - Files without a defined external file type are now directly opened with the default application of the operating system
 - We streamlined the process to rename and move files by removing the confirmation dialogs.
 - We removed the redundant new lines of markings and wrapped the summary in the File annotation tab. [#3823](https://github.com/JabRef/jabref/issues/3823)
-- We add auto url formatting when user paste link to URL field in entry editor. [#254](https://github.com/koppor/jabref/issues/254)
+- We add auto url formatting when user paste link to URL field in entry editor. [koppor#254](https://github.com/koppor/jabref/issues/254)
 - We added a minimal height for the entry editor so that it can no longer be hidden by accident. [#4279](https://github.com/JabRef/jabref/issues/4279)
 - We added a new keyboard shortcut so that the entry editor could be closed by <kbd>Ctrl<kbd> + <kbd>E<kbd>. [#4222] (https://github.com/JabRef/jabref/issues/4222)
 - We added an option in the preference dialog box, that allows user to pick the dark or light theme option. [#4130] (https://github.com/JabRef/jabref/issues/4130)
@@ -74,8 +74,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
 - We fixed an issue where files added via the "Attach file" contextmenu of an entry were not made relative. [#4201](https://github.com/JabRef/jabref/issues/4201) and [#4241](https://github.com/JabRef/jabref/issues/4241)
 - We fixed an issue where author list parser can't generate bibtex for Chinese author. [#4169](https://github.com/JabRef/jabref/issues/4169)
 - We fixed an issue where the list of XMP Exclusion fields in the preferences was not be saved [#4072](https://github.com/JabRef/jabref/issues/4072)
-- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [#4367](https://github.com/JabRef/jabref/pull/4367)
-
+- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [koppor#328](https://github.com/koppor/jabref/issues/328)
+- We fixed an issue where only one PDF file could be imported [#4422](https://github.com/JabRef/jabref/issues/4422)
 
 
 

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
@@ -45,13 +45,14 @@ public class PdfContentImporter extends Importer {
     // input lines into several lines
     private String[] lines;
     // current index in lines
-    private int i;
+    private int lineIndex;
     private String curString;
     private String year;
 
 
     public PdfContentImporter(ImportFormatPreferences importFormatPreferences) {
         this.importFormatPreferences = importFormatPreferences;
+
     }
     /**
      * Removes all non-letter characters at the end
@@ -225,17 +226,19 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
             //   the different lines are joined into one and thereby separated by " "
             lines = firstPageContents.split(System.lineSeparator());
 
+            lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero
+
             proceedToNextNonEmptyLine();
-            if (i >= lines.length) {
+            if (lineIndex >= lines.length) {
                 // PDF could not be parsed or is empty
                 // return empty list
                 return new ParserResult();
             }
 
             // we start at the current line
-            curString = lines[i];
+            curString = lines[lineIndex];
             // i might get incremented later and curString modified, too
-            i = i + 1;
+            lineIndex = lineIndex + 1;
 
             String author;
             String editor = null;
@@ -279,10 +282,10 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
 
             // after title: authors
             author = null;
-            while ((i < lines.length) && !"".equals(lines[i])) {
+            while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
                 // author names are unlikely to be lines among different lines
                 // treat them line by line
-                curString = streamlineNames(lines[i]);
+                curString = streamlineNames(lines[lineIndex]);
                 if (author == null) {
                     author = curString;
                 } else {
@@ -292,38 +295,38 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
                         author = author.concat(" and ").concat(curString);
                     }
                 }
-                i++;
+                lineIndex++;
             }
             curString = "";
-            i++;
+            lineIndex++;
 
             // then, abstract and keywords follow
-            while (i < lines.length) {
-                curString = lines[i];
+            while (lineIndex < lines.length) {
+                curString = lines[lineIndex];
                 if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
                     if (curString.length() == "Abstract".length()) {
                         // only word "abstract" found -- skip line
                         curString = "";
                     } else {
                         curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
                     }
-                    i++;
+                    lineIndex++;
                     // fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
                     // whereas we need linebreak as separator
-                    while ((i < lines.length) && !"".equals(lines[i])) {
-                        curString = curString.concat(lines[i]).concat(System.lineSeparator());
-                        i++;
+                    while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
+                        curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator());
+                        lineIndex++;
                     }
                     abstractT = curString.trim();
-                    i++;
+                    lineIndex++;
                 } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
                     if (curString.length() == "Keywords".length()) {
                         // only word "Keywords" found -- skip line
                         curString = "";
                     } else {
                         curString = curString.substring("Keywords".length() + 1).trim();
                     }
-                    i++;
+                    lineIndex++;
                     fillCurStringWithNonEmptyLines();
                     keywords = removeNonLettersAtEnd(curString);
                 } else {
@@ -340,18 +343,18 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
                         }
                     }
 
-                    i++;
+                    lineIndex++;
                     proceedToNextNonEmptyLine();
                 }
             }
 
-            i = lines.length - 1;
+            lineIndex = lines.length - 1;
 
             // last block: DOI, detailed information
             // sometimes, this information is in the third last block etc...
             // therefore, read until the beginning of the file
 
-            while (i >= 0) {
+            while (lineIndex >= 0) {
                 readLastBlock();
                 // i now points to the block before or is -1
                 // curString contains the last block, separated by " "
@@ -522,8 +525,8 @@ private void extractYear() {
      * proceed to next non-empty line
      */
     private void proceedToNextNonEmptyLine() {
-        while ((i < lines.length) && "".equals(lines[i].trim())) {
-            i++;
+        while ((lineIndex < lines.length) && "".equals(lines[lineIndex].trim())) {
+            lineIndex++;
         }
     }
 
@@ -540,16 +543,16 @@ private void proceedToNextNonEmptyLine() {
     private void fillCurStringWithNonEmptyLines() {
         // ensure that curString does not end with " "
         curString = curString.trim();
-        while ((i < lines.length) && !"".equals(lines[i])) {
-            String curLine = lines[i].trim();
+        while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
+            String curLine = lines[lineIndex].trim();
             if (!"".equals(curLine)) {
                 if (!curString.isEmpty()) {
                     // insert separating space if necessary
                     curString = curString.concat(" ");
                 }
-                curString = curString.concat(lines[i]);
+                curString = curString.concat(lines[lineIndex]);
             }
-            i++;
+            lineIndex++;
         }
 
         proceedToNextNonEmptyLine();
@@ -563,22 +566,22 @@ private void fillCurStringWithNonEmptyLines() {
      * invariant before/after: i points to line before the last handled block
      */
     private void readLastBlock() {
-        while ((i >= 0) && "".equals(lines[i].trim())) {
-            i--;
+        while ((lineIndex >= 0) && "".equals(lines[lineIndex].trim())) {
+            lineIndex--;
         }
         // i is now at the end of a block
 
-        int end = i;
+        int end = lineIndex;
 
         // find beginning
-        while ((i >= 0) && !"".equals(lines[i])) {
-            i--;
+        while ((lineIndex >= 0) && !"".equals(lines[lineIndex])) {
+            lineIndex--;
         }
         // i is now the line before the beginning of the block
         // this fulfills the invariant
 
         curString = "";
-        for (int j = i + 1; j <= end; j++) {
+        for (int j = lineIndex + 1; j <= end; j++) {
             curString = curString.concat(lines[j].trim());
             if (j != end) {
                 curString = curString.concat(" ");

diff --git a/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java b/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java
@@ -1,6 +1,5 @@
 package org.jabref.logic.importer.fileformat;
 
-import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -10,6 +9,8 @@
 import org.jabref.logic.importer.ImportFormatPreferences;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.BibtexEntryTypes;
+import org.jabref.model.entry.FieldName;
 
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -34,14 +35,30 @@ public void testsGetExtensions() {
     @Test
     public void testGetDescription() {
         assertEquals(
-                "PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
-                importer.getDescription());
+                     "PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
+                     importer.getDescription());
     }
 
     @Test
-    public void doesNotHandleEncryptedPdfs() throws URISyntaxException {
+    public void doesNotHandleEncryptedPdfs() throws Exception {
         Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/encrypted.pdf").toURI());
         List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
         assertEquals(Collections.emptyList(), result);
     }
+
+    @Test
+    public void importTwiceWorksAsExpected() throws Exception {
+        Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/minimal.pdf").toURI());
+        List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
+
+        BibEntry expected = new BibEntry(BibtexEntryTypes.INPROCEEDINGS);
+        expected.setField(FieldName.AUTHOR, "1 ");
+        expected.setField(FieldName.TITLE, "Hello World");
+
+        List<BibEntry> resultSecondImport = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
+        assertEquals(Collections.singletonList(expected), result);
+        assertEquals(Collections.singletonList(expected), resultSecondImport);
+
+    }
+
 }