Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check duplicate DOI #6333

Merged
merged 21 commits into from
May 15, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We added support for basic markdown in custom formatted previews [#6194](https://github.com/JabRef/jabref/issues/6194)
- We now show the number of items found and selected to import in the online search dialog. [#6248](https://github.com/JabRef/jabref/pull/6248)
- We created a new install screen for macOS. [#5759](https://github.com/JabRef/jabref/issues/5759)
- We added a new integrity check for duplicate DOIs. [koppor#339](https://github.com/koppor/jabref/issues/339)

### Changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ protected List<IntegrityMessage> call() {
List<IntegrityMessage> result = new ArrayList<>();

ObservableList<BibEntry> entries = database.getDatabase().getEntries();
result.addAll(check.checkDatabase(database.getDatabase()));
for (int i = 0; i < entries.size(); i++) {
if (isCancelled()) {
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.List;
import java.util.Map;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
Expand All @@ -17,7 +16,6 @@ public class BibStringChecker implements Checker {
// Detect # if it doesn't have a \ in front of it or if it starts the string
private static final Pattern UNESCAPED_HASH = Pattern.compile("(?<!\\\\)#|^#");


/**
* Checks, if there is an even number of unescaped #
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.util.Collections;
import java.util.List;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.InternalField;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.List;
import java.util.Optional;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.InternalField;
Expand All @@ -21,7 +20,7 @@ public List<IntegrityMessage> check(BibEntry entry) {
Optional<String> author = entry.getField(StandardField.AUTHOR);
Optional<String> title = entry.getField(StandardField.TITLE);
Optional<String> year = entry.getField(StandardField.YEAR);
if (!author.isPresent() || !title.isPresent() || !year.isPresent()) {
if (author.isEmpty() || title.isEmpty() || year.isEmpty()) {
return Collections.emptyList();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import org.jabref.logic.bibtexkeypattern.BibtexKeyGenerator;
import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternPreferences;
import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
Expand All @@ -26,7 +25,7 @@ public BibtexkeyDeviationChecker(BibDatabaseContext bibDatabaseContext, BibtexKe
@Override
public List<IntegrityMessage> check(BibEntry entry) {
Optional<String> valuekey = entry.getCiteKeyOptional();
if (!valuekey.isPresent()) {
if (valuekey.isEmpty()) {
return Collections.emptyList();
}

Expand Down
10 changes: 10 additions & 0 deletions src/main/java/org/jabref/logic/integrity/Checker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.jabref.logic.integrity;

import java.util.List;

import org.jabref.model.entry.BibEntry;

@FunctionalInterface
public interface Checker {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to EntryChecker?

List<IntegrityMessage> check(BibEntry entry);
}
10 changes: 10 additions & 0 deletions src/main/java/org/jabref/logic/integrity/DatabaseChecker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.jabref.logic.integrity;

import java.util.List;

import org.jabref.model.database.BibDatabase;

@FunctionalInterface
public interface DatabaseChecker {
List<IntegrityMessage> check(BibDatabase database);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.jabref.logic.integrity;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import javafx.collections.ObservableList;

import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;

public class DoiDuplicationChecker implements DatabaseChecker {

@Override
public List<IntegrityMessage> check(BibDatabase database) {
ObservableList<BibEntry> bibEntries = database.getEntries();
BiMap<DOI, List<BibEntry>> duplicateMap = HashBiMap.create(bibEntries.size());
for (BibEntry bibEntry : bibEntries) {
bibEntry.getDOI().ifPresent(doi ->
duplicateMap.computeIfAbsent(doi, absentDoi -> new ArrayList<>()).add(bibEntry));
}

return duplicateMap.inverse().keySet().stream()
.filter(list -> list.size() > 1)
.flatMap(list -> list.stream())
.map(item -> new IntegrityMessage(Localization.lang("Unique DOI used in multiple entries"), item, StandardField.DOI))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think "The same DOI is used..." is better

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just "Same DOI..." will do.

.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.strings.StringUtil;

public class DOIValidityChecker implements ValueChecker {

public class DoiValidityChecker implements ValueChecker {
@Override
public Optional<String> checkValue(String value) {
if (StringUtil.isBlank(value)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import java.util.Objects;
import java.util.Set;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
Expand All @@ -28,14 +27,14 @@ public List<IntegrityMessage> check(BibEntry entry) {
for (Entry<Field, String> field : entry.getFieldMap().entrySet()) {
Set<FieldProperty> properties = field.getKey().getProperties();
if (properties.contains(FieldProperty.SINGLE_ENTRY_LINK)) {
if (!database.getEntryByKey(field.getValue()).isPresent()) {
if (database.getEntryByKey(field.getValue()).isEmpty()) {
result.add(new IntegrityMessage(Localization.lang("Referenced BibTeX key does not exist"), entry,
field.getKey()));
}
} else if (properties.contains(FieldProperty.MULTIPLE_ENTRY_LINK)) {
List<String> keys = new ArrayList<>(Arrays.asList(field.getValue().split(",")));
for (String key : keys) {
if (!database.getEntryByKey(key).isPresent()) {
if (database.getEntryByKey(key).isEmpty()) {
result.add(new IntegrityMessage(
Localization.lang("Referenced BibTeX key does not exist") + ": " + key, entry,
field.getKey()));
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/integrity/FieldChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import org.jabref.model.entry.field.Field;
import org.jabref.model.util.OptionalUtil;

public class FieldChecker implements IntegrityCheck.Checker {
public class FieldChecker implements Checker {
protected final Field field;
private final ValueChecker checker;

Expand All @@ -21,7 +21,7 @@ public FieldChecker(Field field, ValueChecker checker) {
@Override
public List<IntegrityMessage> check(BibEntry entry) {
Optional<String> value = entry.getField(field);
if (!value.isPresent()) {
if (value.isEmpty()) {
return Collections.emptyList();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ private static Multimap<Field, ValueChecker> getAllMap(BibDatabaseContext databa
fieldCheckers.put(StandardField.BOOKTITLE, new BooktitleChecker());
fieldCheckers.put(StandardField.TITLE, new BracketChecker());
fieldCheckers.put(StandardField.TITLE, new TitleChecker(databaseContext));
fieldCheckers.put(StandardField.DOI, new DOIValidityChecker());
fieldCheckers.put(StandardField.DOI, new DoiValidityChecker());
fieldCheckers.put(StandardField.EDITION, new EditionChecker(databaseContext, allowIntegerEdition));
fieldCheckers.put(StandardField.FILE, new FileChecker(databaseContext, filePreferences));
fieldCheckers.put(StandardField.HOWPUBLISHED, new HowPublishedChecker(databaseContext));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
Expand Down
85 changes: 44 additions & 41 deletions src/main/java/org/jabref/logic/integrity/IntegrityCheck.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package org.jabref.logic.integrity;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternPreferences;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand All @@ -14,75 +15,77 @@
public class IntegrityCheck {

private final BibDatabaseContext bibDatabaseContext;
private final FilePreferences filePreferences;
private final BibtexKeyPatternPreferences bibtexKeyPatternPreferences;
private final JournalAbbreviationRepository journalAbbreviationRepository;
private final boolean enforceLegalKey;
private final boolean allowIntegerEdition;
private final FieldCheckers fieldCheckers;
private final List<Checker> entryCheckers;

public IntegrityCheck(BibDatabaseContext bibDatabaseContext,
FilePreferences filePreferences,
BibtexKeyPatternPreferences bibtexKeyPatternPreferences,
JournalAbbreviationRepository journalAbbreviationRepository,
boolean enforceLegalKey,
boolean allowIntegerEdition) {
this.bibDatabaseContext = Objects.requireNonNull(bibDatabaseContext);
this.filePreferences = Objects.requireNonNull(filePreferences);
this.bibtexKeyPatternPreferences = Objects.requireNonNull(bibtexKeyPatternPreferences);
this.journalAbbreviationRepository = Objects.requireNonNull(journalAbbreviationRepository);
this.enforceLegalKey = enforceLegalKey;
this.allowIntegerEdition = allowIntegerEdition;
this.bibDatabaseContext = bibDatabaseContext;

fieldCheckers = new FieldCheckers(bibDatabaseContext,
filePreferences,
journalAbbreviationRepository,
enforceLegalKey,
allowIntegerEdition);

entryCheckers = new ArrayList<>(List.of(
new BibtexKeyChecker(),
new TypeChecker(),
new BibStringChecker(),
new HTMLCharacterChecker(),
new EntryLinkChecker(bibDatabaseContext.getDatabase()),
new BibtexkeyDeviationChecker(bibDatabaseContext, bibtexKeyPatternPreferences),
new BibtexKeyDuplicationChecker(bibDatabaseContext.getDatabase())
));

if (!bibDatabaseContext.isBiblatexMode()) {
entryCheckers.add(new JournalInAbbreviationListChecker(StandardField.JOURNALTITLE, journalAbbreviationRepository));
} else {
entryCheckers.addAll(List.of(
new JournalInAbbreviationListChecker(StandardField.JOURNAL, journalAbbreviationRepository),
new ASCIICharacterChecker(),
new NoBibtexFieldChecker(),
new BibTeXEntryTypeChecker())
);
}
}

public List<IntegrityMessage> checkDatabase() {
List<IntegrityMessage> executeAllCheckers() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Name it simply check?

List<IntegrityMessage> result = new ArrayList<>();

for (BibEntry entry : bibDatabaseContext.getDatabase().getEntries()) {
BibDatabase database = bibDatabaseContext.getDatabase();

for (BibEntry entry : database.getEntries()) {
result.addAll(checkEntry(entry));
}

result.addAll(checkDatabase(database));

return result;
}

public List<IntegrityMessage> checkEntry(BibEntry entry) {
List<IntegrityMessage> result = new ArrayList<>();

if (entry == null) {
return result;
}

FieldCheckers fieldCheckers = new FieldCheckers(bibDatabaseContext,
filePreferences,
journalAbbreviationRepository,
enforceLegalKey,
allowIntegerEdition);
for (FieldChecker checker : fieldCheckers.getAll()) {
result.addAll(checker.check(entry));
for (FieldChecker fieldChecker : fieldCheckers.getAll()) {
result.addAll(fieldChecker.check(entry));
}

if (!bibDatabaseContext.isBiblatexMode()) {
// BibTeX only checkers
result.addAll(new ASCIICharacterChecker().check(entry));
result.addAll(new NoBibtexFieldChecker().check(entry));
result.addAll(new BibTeXEntryTypeChecker().check(entry));
result.addAll(new JournalInAbbreviationListChecker(StandardField.JOURNAL, journalAbbreviationRepository).check(entry));
} else {
result.addAll(new JournalInAbbreviationListChecker(StandardField.JOURNALTITLE, journalAbbreviationRepository).check(entry));
for (Checker entryChecker : entryCheckers) {
result.addAll(entryChecker.check(entry));
}

result.addAll(new BibtexKeyChecker().check(entry));
result.addAll(new TypeChecker().check(entry));
result.addAll(new BibStringChecker().check(entry));
result.addAll(new HTMLCharacterChecker().check(entry));
result.addAll(new EntryLinkChecker(bibDatabaseContext.getDatabase()).check(entry));
result.addAll(new BibtexkeyDeviationChecker(bibDatabaseContext, bibtexKeyPatternPreferences).check(entry));
result.addAll(new BibtexKeyDuplicationChecker(bibDatabaseContext.getDatabase()).check(entry));

return result;
}

@FunctionalInterface
public interface Checker {
List<IntegrityMessage> check(BibEntry entry);
public List<IntegrityMessage> checkDatabase(BibDatabase database) {
return new DoiDuplicationChecker().check(database);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import org.jabref.model.entry.field.Field;

public final class IntegrityMessage implements Cloneable {

private final BibEntry entry;

private final Field field;
private final String message;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
Expand All @@ -24,7 +23,7 @@ public JournalInAbbreviationListChecker(Field field, JournalAbbreviationReposito
@Override
public List<IntegrityMessage> check(BibEntry entry) {
Optional<String> value = entry.getField(field);
if (!value.isPresent()) {
if (value.isEmpty()) {
return Collections.emptyList();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.Set;
import java.util.stream.Collectors;

import org.jabref.logic.integrity.IntegrityCheck.Checker;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.BibField;
Expand Down
Loading