Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logic for parsing references from last page of PDF #11156

Merged
merged 19 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/main/java/org/jabref/gui/actions/SimpleCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ public abstract class SimpleCommand extends CommandBase {

protected ReadOnlyStringWrapper statusMessage = new ReadOnlyStringWrapper("");

public String getStatusMessage() {
return statusMessage.get();
}

public ReadOnlyStringProperty statusMessageProperty() {
return statusMessage.getReadOnlyProperty();
}
Expand Down
141 changes: 120 additions & 21 deletions src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package org.jabref.gui.maintable;

import java.nio.file.Path;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.StringJoiner;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.gui.DialogService;
import org.jabref.gui.StateManager;
Expand All @@ -13,13 +17,22 @@
import org.jabref.gui.util.BackgroundTask;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.fileformat.BibliopgraphyFromPdfImporter;
import org.jabref.logic.importer.util.GrobidService;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.entry.field.StandardField;
import org.jabref.preferences.PreferencesService;

import org.jspecify.annotations.NonNull;
import org.jspecify.annotations.Nullable;

/**
* SIDE EFFECT: Sets the "cites" field of the entry having the linked files
*/
public class ExtractReferencesAction extends SimpleCommand {
private final int FILES_LIMIT = 10;

Expand All @@ -29,6 +42,7 @@ public class ExtractReferencesAction extends SimpleCommand {
private final BibEntry entry;
private final LinkedFile linkedFile;
private final TaskExecutor taskExecutor;
private final BibliopgraphyFromPdfImporter bibliopgraphyFromPdfImporter;

public ExtractReferencesAction(DialogService dialogService,
StateManager stateManager,
Expand All @@ -37,24 +51,30 @@ public ExtractReferencesAction(DialogService dialogService,
this(dialogService, stateManager, preferencesService, null, null, taskExecutor);
}

public ExtractReferencesAction(DialogService dialogService,
StateManager stateManager,
PreferencesService preferencesService,
BibEntry entry,
LinkedFile linkedFile,
TaskExecutor taskExecutor) {
/**
* Can be used to bind the action on a context menu in the linked file view (future work)
*
* @param entry the entry to handle (can be null)
* @param linkedFile the linked file (can be null)
*/
private ExtractReferencesAction(@NonNull DialogService dialogService,
@NonNull StateManager stateManager,
@NonNull PreferencesService preferencesService,
@Nullable BibEntry entry,
@Nullable LinkedFile linkedFile,
@NonNull TaskExecutor taskExecutor) {
this.dialogService = dialogService;
this.stateManager = stateManager;
this.preferencesService = preferencesService;
this.entry = entry;
this.linkedFile = linkedFile;
this.taskExecutor = taskExecutor;
bibliopgraphyFromPdfImporter = new BibliopgraphyFromPdfImporter(preferencesService.getCitationKeyPatternPreferences());

if (this.linkedFile == null) {
this.executable.bind(
ActionHelper.needsEntriesSelected(stateManager)
.and(ActionHelper.hasLinkedFileForSelectedEntries(stateManager))
.and(this.preferencesService.getGrobidPreferences().grobidEnabledProperty())
);
} else {
this.setExecutable(true);
Expand All @@ -68,34 +88,113 @@ public void execute() {

private void extractReferences() {
stateManager.getActiveDatabase().ifPresent(databaseContext -> {
List<BibEntry> selectedEntries = new LinkedList<>();
boolean online = this.preferencesService.getGrobidPreferences().isGrobidEnabled();

List<BibEntry> selectedEntries;
if (entry == null) {
selectedEntries = stateManager.getSelectedEntries();
} else {
selectedEntries.add(entry);
selectedEntries = List.of(entry);
}

List<Path> fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences()));
if (fileList.size() > FILES_LIMIT) {
boolean continueOpening = dialogService.showConfirmationDialogAndWait(Localization.lang("Processing a large number of files"),
Localization.lang("You are about to process %0 files. Continue?", fileList.size()),
Localization.lang("Continue"), Localization.lang("Cancel"));
if (!continueOpening) {
Callable<ParserResult> parserResultCallable;
if (online) {
parserResultCallable = getParserResultCallableOnline(databaseContext, selectedEntries);
if (parserResultCallable == null) {
return;
}
} else {
parserResultCallable = getParserResultCallableOffline(databaseContext, selectedEntries);
}

Callable<ParserResult> parserResultCallable = () -> new ParserResult(
new GrobidService(this.preferencesService.getGrobidPreferences()).processReferences(fileList, preferencesService.getImportFormatPreferences())
);
BackgroundTask<ParserResult> task = BackgroundTask.wrap(parserResultCallable)
.withInitialMessage(Localization.lang("Processing PDF(s)"));

task.onFailure(dialogService::showErrorDialogAndWait);

ImportEntriesDialog dialog = new ImportEntriesDialog(stateManager.getActiveDatabase().get(), task);
dialog.setTitle(Localization.lang("Extract References"));
String title;
if (online) {
title = Localization.lang("Extract References (online)");
} else {
title = Localization.lang("Extract References (offline)");
}
dialog.setTitle(title);
dialogService.showCustomDialogAndWait(dialog);
});
}

private @NonNull Callable<ParserResult> getParserResultCallableOffline(BibDatabaseContext databaseContext, List<BibEntry> selectedEntries) {
return () -> {
BibEntry currentEntry = selectedEntries.getFirst();
List<Path> fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences()));

// We need to have ParserResult handled at the importer, because it imports the meta data (library type, encoding, ...)
ParserResult result = bibliopgraphyFromPdfImporter.importDatabase(fileList.getFirst());

// subsequent files are just appended to result
Iterator<Path> fileListIterator = fileList.iterator();
fileListIterator.next(); // skip first file
extractReferences(fileListIterator, result, currentEntry);

// handle subsequent entries
Iterator<BibEntry> selectedEntriesIterator = selectedEntries.iterator();
selectedEntriesIterator.next(); // skip first entry
while (selectedEntriesIterator.hasNext()) {
currentEntry = selectedEntriesIterator.next();
fileList = FileUtil.getListOfLinkedFiles(List.of(currentEntry), databaseContext.getFileDirectories(preferencesService.getFilePreferences()));
fileListIterator = fileList.iterator();
extractReferences(fileListIterator, result, currentEntry);
}

return result;
};
}

private void extractReferences(Iterator<Path> fileListIterator, ParserResult result, BibEntry currentEntry) {
while (fileListIterator.hasNext()) {
result.getDatabase().insertEntries(bibliopgraphyFromPdfImporter.importDatabase(fileListIterator.next()).getDatabase().getEntries());
}

StringJoiner cites = new StringJoiner(",");
int count = 0;
for (BibEntry importedEntry : result.getDatabase().getEntries()) {
count++;
Optional<String> citationKey = importedEntry.getCitationKey();
if (citationKey.isPresent()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

citationKey.map(cites:add).orElseGet( () ->

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if new code is more readable --> "orElseGet" result needs to be added to the list, too. Uses outer variable "count", which is non final. I needed to wrap in anonymous object.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then better use the original code

cites.add(citationKey.get());
} else {
String sourceCitationKey = currentEntry.getCitationKey().orElse("unknown");
String newCitationKey;
// Could happen if no author and no year is present
// We use the number of the comment field (because there is no other way to get the number reliable)
Pattern pattern = Pattern.compile("^\\[(\\d+)\\]");
Matcher matcher = pattern.matcher(importedEntry.getField(StandardField.COMMENT).orElse(""));
if (matcher.hasMatch()) {
newCitationKey = sourceCitationKey + "-" + matcher.group(1);
} else {
newCitationKey = sourceCitationKey + "-" + count;
}
importedEntry.setCitationKey(newCitationKey);
cites.add(newCitationKey);
}
}
currentEntry.setField(StandardField.CITES, cites.toString());
}

private @Nullable Callable<ParserResult> getParserResultCallableOnline(BibDatabaseContext databaseContext, List<BibEntry> selectedEntries) {
Callable<ParserResult> parserResultCallable;
List<Path> fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences()));
if (fileList.size() > FILES_LIMIT) {
boolean continueOpening = dialogService.showConfirmationDialogAndWait(Localization.lang("Processing a large number of files"),
Localization.lang("You are about to process %0 files. Continue?", fileList.size()),
Localization.lang("Continue"), Localization.lang("Cancel"));
if (!continueOpening) {
return null;
}
}
parserResultCallable = () -> new ParserResult(
new GrobidService(this.preferencesService.getGrobidPreferences()).processReferences(fileList, preferencesService.getImportFormatPreferences())
);
return parserResultCallable;
}
}
11 changes: 0 additions & 11 deletions src/main/java/org/jabref/gui/menus/ChangeEntryTypeAction.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import javax.swing.undo.UndoManager;

import javafx.beans.property.ReadOnlyStringProperty;
import javafx.beans.property.ReadOnlyStringWrapper;

import org.jabref.gui.EntryTypeView;
Expand Down Expand Up @@ -36,14 +35,4 @@ public void execute() {
.ifPresent(change -> compound.addEdit(new UndoableChangeType(change))));
undoManager.addEdit(compound);
}

@Override
public String getStatusMessage() {
return statusMessage.get();
}

@Override
public ReadOnlyStringProperty statusMessageProperty() {
return statusMessageProperty.getReadOnlyProperty();
}
}
32 changes: 16 additions & 16 deletions src/main/java/org/jabref/logic/importer/Importer.java
Original file line number Diff line number Diff line change
Expand Up @@ -168,22 +168,6 @@ public static BufferedReader getReader(InputStream stream) {
return new BufferedReader(reader);
}

/**
* Returns the name of this import format.
*
* <p>The name must be unique.</p>
*
* @return format name, must be unique and not <code>null</code>
*/
public abstract String getName();

/**
* Returns the type of files that this importer can read
*
* @return {@link FileType} corresponding to the importer
*/
public abstract FileType getFileType();

/**
* Returns a one-word ID which identifies this importer. Used for example, to identify the importer when used from
* the command line.
Expand All @@ -202,6 +186,15 @@ public String getId() {
return result.toString();
}

/**
* Returns the name of this import format.
*
* <p>The name must be unique.</p>
*
* @return format name, must be unique and not <code>null</code>
*/
public abstract String getName();

/**
* Returns the description of the import format.
* <p>
Expand All @@ -216,6 +209,13 @@ public String getId() {
*/
public abstract String getDescription();

/**
* Returns the type of files that this importer can read
*
* @return {@link FileType} corresponding to the importer
*/
public abstract FileType getFileType();

@Override
public int hashCode() {
return getName().hashCode();
Expand Down
Loading
Loading