From 020cc975800a65cf2c3466b57ed91aa4502f554c Mon Sep 17 00:00:00 2001 From: Dominik Voigt Date: Mon, 28 Dec 2020 17:30:19 +0100 Subject: [PATCH] Improved detection of long DOI's within text (#7260) * Improved detection of long DOI's within text. fixes #7256. * Fix checkstyle Signed-off-by: Dominik Voigt Co-authored-by: Nikolaus Koopmann --- .../java/org/jabref/model/entry/identifier/DOI.java | 2 +- .../org/jabref/model/entry/identifier/DOITest.java | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/model/entry/identifier/DOI.java b/src/main/java/org/jabref/model/entry/identifier/DOI.java index 428930db187..ad18670b4f6 100644 --- a/src/main/java/org/jabref/model/entry/identifier/DOI.java +++ b/src/main/java/org/jabref/model/entry/identifier/DOI.java @@ -45,7 +45,7 @@ public class DOI implements Identifier { + "10" // directory indicator + "(?:\\.[0-9]+)+" // registrant codes + "[/:]" // divider - + "(?:[^\\s]+)" // suffix alphanumeric without space + + "(?:[^\\s,;]+[^,;(\\.\\s)])" // suffix alphanumeric without " "/","/";" and not ending on "."/","/";" + ")"; // end group \1 // Regex (Short DOI) diff --git a/src/test/java/org/jabref/model/entry/identifier/DOITest.java b/src/test/java/org/jabref/model/entry/identifier/DOITest.java index cc35b55fd5f..b1f9e4e5762 100644 --- a/src/test/java/org/jabref/model/entry/identifier/DOITest.java +++ b/src/test/java/org/jabref/model/entry/identifier/DOITest.java @@ -165,12 +165,23 @@ private static Stream testData() { // findDoiInsideArbitraryText Arguments.of("10.1006/jmbi.1998.2354", DOI.findInText("other stuff 10.1006/jmbi.1998.2354 end").get().getDOI()), + Arguments.of("10.1007/s10549-018-4743-9", + DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9. ").get().getDOI()), + Arguments.of("10.1007/s10549-018-4743-9", + DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9, ").get().getDOI()), + Arguments.of("10.1007/s10549-018-4743-9", + DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9;something else").get().getDOI()), + Arguments.of("10.1007/s10549-018-4743-9.1234", + DOI.findInText("bla doi:10.1007/s10549-018-4743-9.1234 with . in doi").get().getDOI()), // findShortDoiInsideArbitraryText Arguments.of("10/12ab", DOI.findInText("other stuff doi:10/12ab end").get().getDOI()), Arguments.of("10/12ab", DOI.findInText("other stuff /urn:doi:10/12ab end").get().getDOI()), Arguments.of("10%12ab", DOI.findInText("other stuff doi:10%12ab end").get().getDOI()), Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab end").get().getDOI()), + Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab, end").get().getDOI()), + Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab. end").get().getDOI()), + Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab; end").get().getDOI()), Arguments.of("10/1234", DOI.findInText("10/B(C)/15 \n" + " \n" + "10:51 \n" +