Skip to content

Commit

Permalink
Improved detection of long DOI's within text (#7260)
Browse files Browse the repository at this point in the history
* Improved detection of long DOI's within text. fixes #7256.

* Fix checkstyle

Signed-off-by: Dominik Voigt <Dominik.ingo.voigt@gmail.com>

Co-authored-by: Nikolaus Koopmann <nikolauskoopmann@gmail.com>
  • Loading branch information
DominikVoigt and PremKolar authored Dec 28, 2020
1 parent 27864e9 commit 020cc97
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public class DOI implements Identifier {
+ "10" // directory indicator
+ "(?:\\.[0-9]+)+" // registrant codes
+ "[/:]" // divider
+ "(?:[^\\s]+)" // suffix alphanumeric without space
+ "(?:[^\\s,;]+[^,;(\\.\\s)])" // suffix alphanumeric without " "/","/";" and not ending on "."/","/";"
+ ")"; // end group \1

// Regex (Short DOI)
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/org/jabref/model/entry/identifier/DOITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,23 @@ private static Stream<Arguments> testData() {
// findDoiInsideArbitraryText
Arguments.of("10.1006/jmbi.1998.2354",
DOI.findInText("other stuff 10.1006/jmbi.1998.2354 end").get().getDOI()),
Arguments.of("10.1007/s10549-018-4743-9",
DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9. ").get().getDOI()),
Arguments.of("10.1007/s10549-018-4743-9",
DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9, ").get().getDOI()),
Arguments.of("10.1007/s10549-018-4743-9",
DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9;something else").get().getDOI()),
Arguments.of("10.1007/s10549-018-4743-9.1234",
DOI.findInText("bla doi:10.1007/s10549-018-4743-9.1234 with . in doi").get().getDOI()),

// findShortDoiInsideArbitraryText
Arguments.of("10/12ab", DOI.findInText("other stuff doi:10/12ab end").get().getDOI()),
Arguments.of("10/12ab", DOI.findInText("other stuff /urn:doi:10/12ab end").get().getDOI()),
Arguments.of("10%12ab", DOI.findInText("other stuff doi:10%12ab end").get().getDOI()),
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab end").get().getDOI()),
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab, end").get().getDOI()),
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab. end").get().getDOI()),
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab; end").get().getDOI()),
Arguments.of("10/1234", DOI.findInText("10/B(C)/15 \n" +
" \n" +
"10:51 \n" +
Expand Down

0 comments on commit 020cc97

Please sign in to comment.