From 583c61fba671cb15c46d8d3284982793e4b0812d Mon Sep 17 00:00:00 2001 From: Christoph Date: Tue, 30 Oct 2018 09:34:54 +0100 Subject: [PATCH] Fix ArrayIndexOutOfBoundsException on second pdf import (#4426) * Fix ArrayIndexOutOfBoundsException on second pdf import The variable formally known as i is a global variable which had -1 after the first run and therefore threw an exception * add changelog and fix * add test --- CHANGELOG.md | 6 +- .../fileformat/PdfContentImporter.java | 65 ++++++++++--------- .../fileformat/PdfContentImporterTest.java | 25 +++++-- 3 files changed, 58 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9848a45705f..bab3e63099a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - Files without a defined external file type are now directly opened with the default application of the operating system - We streamlined the process to rename and move files by removing the confirmation dialogs. - We removed the redundant new lines of markings and wrapped the summary in the File annotation tab. [#3823](https://github.com/JabRef/jabref/issues/3823) -- We add auto url formatting when user paste link to URL field in entry editor. [#254](https://github.com/koppor/jabref/issues/254) +- We add auto url formatting when user paste link to URL field in entry editor. [koppor#254](https://github.com/koppor/jabref/issues/254) - We added a minimal height for the entry editor so that it can no longer be hidden by accident. [#4279](https://github.com/JabRef/jabref/issues/4279) - We added a new keyboard shortcut so that the entry editor could be closed by Ctrl + E. [#4222] (https://github.com/JabRef/jabref/issues/4222) - We added an option in the preference dialog box, that allows user to pick the dark or light theme option. [#4130] (https://github.com/JabRef/jabref/issues/4130) @@ -74,8 +74,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - We fixed an issue where files added via the "Attach file" contextmenu of an entry were not made relative. [#4201](https://github.com/JabRef/jabref/issues/4201) and [#4241](https://github.com/JabRef/jabref/issues/4241) - We fixed an issue where author list parser can't generate bibtex for Chinese author. [#4169](https://github.com/JabRef/jabref/issues/4169) - We fixed an issue where the list of XMP Exclusion fields in the preferences was not be saved [#4072](https://github.com/JabRef/jabref/issues/4072) -- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [#4367](https://github.com/JabRef/jabref/pull/4367) - +- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [koppor#328](https://github.com/koppor/jabref/issues/328) +- We fixed an issue where only one PDF file could be imported [#4422](https://github.com/JabRef/jabref/issues/4422) diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java index 67020c721fc..73d5f0544fc 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java @@ -45,13 +45,14 @@ public class PdfContentImporter extends Importer { // input lines into several lines private String[] lines; // current index in lines - private int i; + private int lineIndex; private String curString; private String year; public PdfContentImporter(ImportFormatPreferences importFormatPreferences) { this.importFormatPreferences = importFormatPreferences; + } /** * Removes all non-letter characters at the end @@ -225,17 +226,19 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) { // the different lines are joined into one and thereby separated by " " lines = firstPageContents.split(System.lineSeparator()); + lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero + proceedToNextNonEmptyLine(); - if (i >= lines.length) { + if (lineIndex >= lines.length) { // PDF could not be parsed or is empty // return empty list return new ParserResult(); } // we start at the current line - curString = lines[i]; + curString = lines[lineIndex]; // i might get incremented later and curString modified, too - i = i + 1; + lineIndex = lineIndex + 1; String author; String editor = null; @@ -279,10 +282,10 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) { // after title: authors author = null; - while ((i < lines.length) && !"".equals(lines[i])) { + while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) { // author names are unlikely to be lines among different lines // treat them line by line - curString = streamlineNames(lines[i]); + curString = streamlineNames(lines[lineIndex]); if (author == null) { author = curString; } else { @@ -292,14 +295,14 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) { author = author.concat(" and ").concat(curString); } } - i++; + lineIndex++; } curString = ""; - i++; + lineIndex++; // then, abstract and keywords follow - while (i < lines.length) { - curString = lines[i]; + while (lineIndex < lines.length) { + curString = lines[lineIndex]; if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) { if (curString.length() == "Abstract".length()) { // only word "abstract" found -- skip line @@ -307,15 +310,15 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) { } else { curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator()); } - i++; + lineIndex++; // fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator // whereas we need linebreak as separator - while ((i < lines.length) && !"".equals(lines[i])) { - curString = curString.concat(lines[i]).concat(System.lineSeparator()); - i++; + while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) { + curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator()); + lineIndex++; } abstractT = curString.trim(); - i++; + lineIndex++; } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) { if (curString.length() == "Keywords".length()) { // only word "Keywords" found -- skip line @@ -323,7 +326,7 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) { } else { curString = curString.substring("Keywords".length() + 1).trim(); } - i++; + lineIndex++; fillCurStringWithNonEmptyLines(); keywords = removeNonLettersAtEnd(curString); } else { @@ -340,18 +343,18 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) { } } - i++; + lineIndex++; proceedToNextNonEmptyLine(); } } - i = lines.length - 1; + lineIndex = lines.length - 1; // last block: DOI, detailed information // sometimes, this information is in the third last block etc... // therefore, read until the beginning of the file - while (i >= 0) { + while (lineIndex >= 0) { readLastBlock(); // i now points to the block before or is -1 // curString contains the last block, separated by " " @@ -522,8 +525,8 @@ private void extractYear() { * proceed to next non-empty line */ private void proceedToNextNonEmptyLine() { - while ((i < lines.length) && "".equals(lines[i].trim())) { - i++; + while ((lineIndex < lines.length) && "".equals(lines[lineIndex].trim())) { + lineIndex++; } } @@ -540,16 +543,16 @@ private void proceedToNextNonEmptyLine() { private void fillCurStringWithNonEmptyLines() { // ensure that curString does not end with " " curString = curString.trim(); - while ((i < lines.length) && !"".equals(lines[i])) { - String curLine = lines[i].trim(); + while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) { + String curLine = lines[lineIndex].trim(); if (!"".equals(curLine)) { if (!curString.isEmpty()) { // insert separating space if necessary curString = curString.concat(" "); } - curString = curString.concat(lines[i]); + curString = curString.concat(lines[lineIndex]); } - i++; + lineIndex++; } proceedToNextNonEmptyLine(); @@ -563,22 +566,22 @@ private void fillCurStringWithNonEmptyLines() { * invariant before/after: i points to line before the last handled block */ private void readLastBlock() { - while ((i >= 0) && "".equals(lines[i].trim())) { - i--; + while ((lineIndex >= 0) && "".equals(lines[lineIndex].trim())) { + lineIndex--; } // i is now at the end of a block - int end = i; + int end = lineIndex; // find beginning - while ((i >= 0) && !"".equals(lines[i])) { - i--; + while ((lineIndex >= 0) && !"".equals(lines[lineIndex])) { + lineIndex--; } // i is now the line before the beginning of the block // this fulfills the invariant curString = ""; - for (int j = i + 1; j <= end; j++) { + for (int j = lineIndex + 1; j <= end; j++) { curString = curString.concat(lines[j].trim()); if (j != end) { curString = curString.concat(" "); diff --git a/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java b/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java index db7396d047c..b82795865d6 100644 --- a/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java +++ b/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java @@ -1,6 +1,5 @@ package org.jabref.logic.importer.fileformat; -import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.Paths; @@ -10,6 +9,8 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.util.StandardFileType; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BibtexEntryTypes; +import org.jabref.model.entry.FieldName; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -34,14 +35,30 @@ public void testsGetExtensions() { @Test public void testGetDescription() { assertEquals( - "PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.", - importer.getDescription()); + "PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.", + importer.getDescription()); } @Test - public void doesNotHandleEncryptedPdfs() throws URISyntaxException { + public void doesNotHandleEncryptedPdfs() throws Exception { Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/encrypted.pdf").toURI()); List result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); assertEquals(Collections.emptyList(), result); } + + @Test + public void importTwiceWorksAsExpected() throws Exception { + Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/minimal.pdf").toURI()); + List result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); + + BibEntry expected = new BibEntry(BibtexEntryTypes.INPROCEEDINGS); + expected.setField(FieldName.AUTHOR, "1 "); + expected.setField(FieldName.TITLE, "Hello World"); + + List resultSecondImport = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries(); + assertEquals(Collections.singletonList(expected), result); + assertEquals(Collections.singletonList(expected), resultSecondImport); + + } + }