Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ArrayIndexOutOfBoundsException on second pdf import #4426

Merged
merged 3 commits into from
Oct 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Files without a defined external file type are now directly opened with the default application of the operating system
- We streamlined the process to rename and move files by removing the confirmation dialogs.
- We removed the redundant new lines of markings and wrapped the summary in the File annotation tab. [#3823](https://github.com/JabRef/jabref/issues/3823)
- We add auto url formatting when user paste link to URL field in entry editor. [#254](https://github.com/koppor/jabref/issues/254)
- We add auto url formatting when user paste link to URL field in entry editor. [koppor#254](https://github.com/koppor/jabref/issues/254)
- We added a minimal height for the entry editor so that it can no longer be hidden by accident. [#4279](https://github.com/JabRef/jabref/issues/4279)
- We added a new keyboard shortcut so that the entry editor could be closed by <kbd>Ctrl<kbd> + <kbd>E<kbd>. [#4222] (https://github.com/JabRef/jabref/issues/4222)
- We added an option in the preference dialog box, that allows user to pick the dark or light theme option. [#4130] (https://github.com/JabRef/jabref/issues/4130)
Expand Down Expand Up @@ -74,8 +74,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- We fixed an issue where files added via the "Attach file" contextmenu of an entry were not made relative. [#4201](https://github.com/JabRef/jabref/issues/4201) and [#4241](https://github.com/JabRef/jabref/issues/4241)
- We fixed an issue where author list parser can't generate bibtex for Chinese author. [#4169](https://github.com/JabRef/jabref/issues/4169)
- We fixed an issue where the list of XMP Exclusion fields in the preferences was not be saved [#4072](https://github.com/JabRef/jabref/issues/4072)
- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [#4367](https://github.com/JabRef/jabref/pull/4367)

- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [koppor#328](https://github.com/koppor/jabref/issues/328)
- We fixed an issue where only one PDF file could be imported [#4422](https://github.com/JabRef/jabref/issues/4422)



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ public class PdfContentImporter extends Importer {
// input lines into several lines
private String[] lines;
// current index in lines
private int i;
private int lineIndex;
private String curString;
private String year;


public PdfContentImporter(ImportFormatPreferences importFormatPreferences) {
this.importFormatPreferences = importFormatPreferences;

}
/**
* Removes all non-letter characters at the end
Expand Down Expand Up @@ -225,17 +226,19 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
// the different lines are joined into one and thereby separated by " "
lines = firstPageContents.split(System.lineSeparator());

lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero

proceedToNextNonEmptyLine();
if (i >= lines.length) {
if (lineIndex >= lines.length) {
// PDF could not be parsed or is empty
// return empty list
return new ParserResult();
}

// we start at the current line
curString = lines[i];
curString = lines[lineIndex];
// i might get incremented later and curString modified, too
i = i + 1;
lineIndex = lineIndex + 1;

String author;
String editor = null;
Expand Down Expand Up @@ -279,10 +282,10 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {

// after title: authors
author = null;
while ((i < lines.length) && !"".equals(lines[i])) {
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
// author names are unlikely to be lines among different lines
// treat them line by line
curString = streamlineNames(lines[i]);
curString = streamlineNames(lines[lineIndex]);
if (author == null) {
author = curString;
} else {
Expand All @@ -292,38 +295,38 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
author = author.concat(" and ").concat(curString);
}
}
i++;
lineIndex++;
}
curString = "";
i++;
lineIndex++;

// then, abstract and keywords follow
while (i < lines.length) {
curString = lines[i];
while (lineIndex < lines.length) {
curString = lines[lineIndex];
if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
if (curString.length() == "Abstract".length()) {
// only word "abstract" found -- skip line
curString = "";
} else {
curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
}
i++;
lineIndex++;
// fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
// whereas we need linebreak as separator
while ((i < lines.length) && !"".equals(lines[i])) {
curString = curString.concat(lines[i]).concat(System.lineSeparator());
i++;
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator());
lineIndex++;
}
abstractT = curString.trim();
i++;
lineIndex++;
} else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
if (curString.length() == "Keywords".length()) {
// only word "Keywords" found -- skip line
curString = "";
} else {
curString = curString.substring("Keywords".length() + 1).trim();
}
i++;
lineIndex++;
fillCurStringWithNonEmptyLines();
keywords = removeNonLettersAtEnd(curString);
} else {
Expand All @@ -340,18 +343,18 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
}
}

i++;
lineIndex++;
proceedToNextNonEmptyLine();
}
}

i = lines.length - 1;
lineIndex = lines.length - 1;

// last block: DOI, detailed information
// sometimes, this information is in the third last block etc...
// therefore, read until the beginning of the file

while (i >= 0) {
while (lineIndex >= 0) {
readLastBlock();
// i now points to the block before or is -1
// curString contains the last block, separated by " "
Expand Down Expand Up @@ -522,8 +525,8 @@ private void extractYear() {
* proceed to next non-empty line
*/
private void proceedToNextNonEmptyLine() {
while ((i < lines.length) && "".equals(lines[i].trim())) {
i++;
while ((lineIndex < lines.length) && "".equals(lines[lineIndex].trim())) {
lineIndex++;
}
}

Expand All @@ -540,16 +543,16 @@ private void proceedToNextNonEmptyLine() {
private void fillCurStringWithNonEmptyLines() {
// ensure that curString does not end with " "
curString = curString.trim();
while ((i < lines.length) && !"".equals(lines[i])) {
String curLine = lines[i].trim();
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
String curLine = lines[lineIndex].trim();
if (!"".equals(curLine)) {
if (!curString.isEmpty()) {
// insert separating space if necessary
curString = curString.concat(" ");
}
curString = curString.concat(lines[i]);
curString = curString.concat(lines[lineIndex]);
}
i++;
lineIndex++;
}

proceedToNextNonEmptyLine();
Expand All @@ -563,22 +566,22 @@ private void fillCurStringWithNonEmptyLines() {
* invariant before/after: i points to line before the last handled block
*/
private void readLastBlock() {
while ((i >= 0) && "".equals(lines[i].trim())) {
i--;
while ((lineIndex >= 0) && "".equals(lines[lineIndex].trim())) {
lineIndex--;
}
// i is now at the end of a block

int end = i;
int end = lineIndex;

// find beginning
while ((i >= 0) && !"".equals(lines[i])) {
i--;
while ((lineIndex >= 0) && !"".equals(lines[lineIndex])) {
lineIndex--;
}
// i is now the line before the beginning of the block
// this fulfills the invariant

curString = "";
for (int j = i + 1; j <= end; j++) {
for (int j = lineIndex + 1; j <= end; j++) {
curString = curString.concat(lines[j].trim());
if (j != end) {
curString = curString.concat(" ");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.logic.importer.fileformat;

import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -10,6 +9,8 @@
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.util.StandardFileType;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibtexEntryTypes;
import org.jabref.model.entry.FieldName;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -34,14 +35,30 @@ public void testsGetExtensions() {
@Test
public void testGetDescription() {
assertEquals(
"PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
importer.getDescription());
"PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
importer.getDescription());
}

@Test
public void doesNotHandleEncryptedPdfs() throws URISyntaxException {
public void doesNotHandleEncryptedPdfs() throws Exception {
Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/encrypted.pdf").toURI());
List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
assertEquals(Collections.emptyList(), result);
}

@Test
public void importTwiceWorksAsExpected() throws Exception {
Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/minimal.pdf").toURI());
List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();

BibEntry expected = new BibEntry(BibtexEntryTypes.INPROCEEDINGS);
expected.setField(FieldName.AUTHOR, "1 ");
expected.setField(FieldName.TITLE, "Hello World");

List<BibEntry> resultSecondImport = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
assertEquals(Collections.singletonList(expected), result);
assertEquals(Collections.singletonList(expected), resultSecondImport);

}

}