Skip to content

Commit

Permalink
decode-DOI-contains-masked-characters#8787
Browse files Browse the repository at this point in the history
Modify DOI.java and DioClean.java to decode DOI that contains masked character.
Add DoiDecodeCleanupTest.java
  • Loading branch information
fly-ing-fish committed May 17, 2022
1 parent 6a2332f commit 5edaf43
Show file tree
Hide file tree
Showing 3 changed files with 323 additions and 32 deletions.
49 changes: 46 additions & 3 deletions src/main/java/org/jabref/logic/cleanup/DoiCleanup.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jabref.logic.cleanup;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand All @@ -14,23 +16,57 @@
import org.jabref.model.entry.identifier.DOI;

/**
* Formats the DOI (e.g. removes http part) and also moves DOIs from note, url or ee field to the doi field.
* Formats
* the
* DOI
* (e.g.
* removes
* http
* part)
* and
* also
* moves
* DOIs
* from
* note,
* url
* or
* ee
* field
* to
* the
* doi
* field.
*/
public class DoiCleanup implements CleanupJob {

/**
* Fields to check for DOIs.
* Fields
* to
* check
* for
* DOIs.
*/
private static final List<Field> FIELDS = Arrays.asList(StandardField.NOTE, StandardField.URL, new UnknownField("ee"));

@Override
public List<FieldChange> cleanup(BibEntry entry) {

List<FieldChange> changes = new ArrayList<>();

// First check if the Doi Field is empty
if (entry.hasField(StandardField.DOI)) {
String doiFieldValue = entry.getField(StandardField.DOI).orElse(null);

String decodeDoiFieldValue = "";
try {
decodeDoiFieldValue = URLDecoder.decode(doiFieldValue, "UTF-8");
} catch (
UnsupportedEncodingException e) {
decodeDoiFieldValue = doiFieldValue;
}
doiFieldValue = decodeDoiFieldValue;

Optional<DOI> doi = DOI.parse(doiFieldValue);

if (doi.isPresent()) {
Expand All @@ -45,7 +81,7 @@ public List<FieldChange> cleanup(BibEntry entry) {
// Doi field seems to contain Doi -> cleanup note, url, ee field
for (Field field : FIELDS) {
entry.getField(field).flatMap(DOI::parse)
.ifPresent(unused -> removeFieldValue(entry, field, changes));
.ifPresent(unused -> removeFieldValue(entry, field, changes));
}
}
} else {
Expand All @@ -68,4 +104,11 @@ private void removeFieldValue(BibEntry entry, Field field, List<FieldChange> cha
CleanupJob eraser = new FieldFormatterCleanup(field, new ClearFormatter());
changes.addAll(eraser.cleanup(entry));
}

private String decodeDoi(String doiValue) throws UnsupportedEncodingException {
if (doiValue == null) {
return null;
}
return URLDecoder.decode(doiValue, "UTF-8");
}
}
Loading

0 comments on commit 5edaf43

Please sign in to comment.