-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #99 from JabRef/doi-parser
Rewrite DOI parsing
- Loading branch information
Showing
12 changed files
with
281 additions
and
268 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package net.sf.jabref.util; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.util.Objects; | ||
import java.util.Optional; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
public class DOI { | ||
private static final Log LOGGER = LogFactory.getLog(DOI.class); | ||
|
||
// DOI resolver | ||
public static final URI RESOLVER = URI.create("http://doi.org"); | ||
|
||
// Regex | ||
// (see http://www.doi.org/doi_handbook/2_Numbering.html) | ||
private static final String DOI_EXP = "" | ||
+ "(?:urn:)?" // optional urn | ||
+ "(?:doi:)?" // optional doi | ||
+ "(" // begin group \1 | ||
+ "10" // directory indicator | ||
+ "(?:\\.[0-9]+)+" // registrant codes | ||
+ "[/:]" // divider | ||
+ "(?:.+)" // suffix alphanumeric string | ||
+ ")"; // end group \1 | ||
|
||
private static final String HTTP_EXP = "https?://[^\\s]+?" + DOI_EXP; | ||
// Pattern | ||
private static final Pattern DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + DOI_EXP + "$", Pattern.CASE_INSENSITIVE); | ||
|
||
/** | ||
* Creates an Optional<DOI> from various schemes including URL, URN, and plain DOIs. | ||
* | ||
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor | ||
* and checking for Optional.isPresent() instead. | ||
* | ||
* @param doi the DOI string | ||
* @return an Optional containing the DOI or an empty Optional | ||
*/ | ||
public static Optional<DOI> build(String doi) { | ||
try { | ||
return Optional.of(new DOI(doi)); | ||
} catch(NullPointerException | IllegalArgumentException e) { | ||
return Optional.empty(); | ||
} | ||
} | ||
|
||
// DOI | ||
private final String doi; | ||
|
||
/** | ||
* Creates a DOI from various schemes including URL, URN, and plain DOIs. | ||
* | ||
* @param doi the DOI string | ||
* @throws NullPointerException if DOI is null | ||
* @throws IllegalArgumentException if doi does not include a valid DOI | ||
* @return an instance of the DOI class | ||
*/ | ||
public DOI(String doi) { | ||
Objects.requireNonNull(doi); | ||
|
||
// Remove whitespace | ||
doi = doi.trim(); | ||
|
||
// HTTP URL decoding | ||
if(doi.matches(HTTP_EXP)) { | ||
try { | ||
// decodes path segment | ||
URI url = new URI(doi); | ||
doi = url.getScheme() + "://" + url.getHost() + url.getPath(); | ||
} catch(URISyntaxException e) { | ||
throw new IllegalArgumentException(doi + " is not a valid HTTP DOI."); | ||
} | ||
} | ||
|
||
// Extract DOI | ||
Matcher matcher = DOI_PATT.matcher(doi); | ||
if (matcher.find()) { | ||
// match only group \1 | ||
this.doi = matcher.group(1); | ||
} else { | ||
throw new IllegalArgumentException(doi + " is not a valid DOI."); | ||
} | ||
} | ||
|
||
/** | ||
* Return the plain DOI | ||
* | ||
* @return the plain DOI value. | ||
*/ | ||
public String getDOI() { | ||
return doi; | ||
} | ||
|
||
/** | ||
* Return a URL presentation for the DOI | ||
* | ||
* @return an encoded URL representation of the DOI | ||
*/ | ||
public String getURL() { | ||
try { | ||
URI uri = new URI(RESOLVER.getScheme(), RESOLVER.getHost(), "/" + doi, null); | ||
return uri.toASCIIString(); | ||
} catch(URISyntaxException e) { | ||
// should never happen | ||
LOGGER.error(doi + " could not be encoded as URL."); | ||
return ""; | ||
} | ||
} | ||
} |
Oops, something went wrong.