From c5399a77668bf2bb1dd121b0ff646015b20e4285 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Tue, 26 Sep 2023 16:20:05 +0200 Subject: [PATCH 1/7] Exclude check for ampersand (&) at verbatim fields --- .../logic/integrity/AmpersandChecker.java | 5 +++++ .../logic/integrity/AmpersandCheckerTest.java | 14 ++++++++++++++ .../integrity/HTMLCharacterCheckerTest.java | 19 +++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java index 9da189890c0..560e77fbb13 100644 --- a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -9,11 +9,13 @@ import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.FieldProperty; import com.google.common.base.CharMatcher; /** * Checks if the BibEntry contains unescaped ampersands. + * This is done in nonverbatim fields. Similar to {@link HTMLCharacterChecker} */ public class AmpersandChecker implements EntryChecker { // matches for an & preceded by any number of \ @@ -24,6 +26,9 @@ public List check(BibEntry entry) { List results = new ArrayList<>(); for (Map.Entry field : entry.getFieldMap().entrySet()) { + if (field.getKey().getProperties().contains(FieldProperty.VERBATIM)) { + continue; + } // counts the number of even \ occurrences preceding an & long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) .results() diff --git a/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java b/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java index 81d9942c162..1efdd5ae82d 100644 --- a/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java +++ b/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java @@ -65,4 +65,18 @@ void entryWithMultipleEscapedAndUnescapedAmpersands() { entry.setField(StandardField.AFTERWORD, "May the force be with you & live long \\\\& prosper \\& to infinity \\\\\\& beyond & assemble \\\\\\\\& excelsior!"); assertEquals(List.of(new IntegrityMessage("Found 4 unescaped '&'", entry, StandardField.AFTERWORD)), checker.check(entry)); } + + static Stream entryWithVerabitmFieldsNotCausingMessages() { + return Stream.of( + Arguments.of(StandardField.FILE, "one & another.pdf"), + Arguments.of(StandardField.URL, "https://example.org?key=value&key2=value2") + ); + } + + @ParameterizedTest + @MethodSource + void entryWithVerabitmFieldsNotCausingMessages(Field field, String value) { + entry.setField(field, value); + assertEquals(List.of(), checker.check(entry)); + } } diff --git a/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java b/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java index 3a08d49af93..f185132a29c 100644 --- a/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java +++ b/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java @@ -2,11 +2,16 @@ import java.util.Collections; import java.util.List; +import java.util.stream.Stream; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.StandardField; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -59,4 +64,18 @@ void journalDoesNotAcceptHTMLEncodedCharacters() { entry.setField(StandardField.JOURNAL, "Ärling Ström for – ‱"); assertEquals(List.of(new IntegrityMessage("HTML encoded character found", entry, StandardField.JOURNAL)), checker.check(entry)); } + + static Stream entryWithVerabitmFieldsNotCausingMessages() { + return Stream.of( + Arguments.of(StandardField.FILE, "one & another.pdf"), + Arguments.of(StandardField.URL, "https://example.org?key=value&key2=value2") + ); + } + + @ParameterizedTest + @MethodSource + void entryWithVerabitmFieldsNotCausingMessages(Field field, String value) { + entry.setField(field, value); + assertEquals(List.of(), checker.check(entry)); + } } From a9b1015a16255be0af3794a038ce7e9bc017c3b6 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Tue, 26 Sep 2023 16:22:28 +0200 Subject: [PATCH 2/7] Add CHANGELOG.md entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67deb76ec92..82774c9369e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - It is possible again to use "current table sort order" for the order of entries when saving. [#9869](https://github.com/JabRef/jabref/issues/9869) - Passwords can be stored in GNOME key ring. [#10274](https://github.com/JabRef/jabref/issues/10274) +- The ampersand checker skips verbatim fields (`file`, `url`, ...). [#10419](https://github.com/JabRef/jabref/pull/10419) - We fixed an issue where groups based on an aux file could not be created due to an exception [#10350](https://github.com/JabRef/jabref/issues/10350) - We fixed an issue where the JabRef browser extension could not communicate with JabRef under macOS due to missing files. You should use the `.pkg` for the first installation as it updates all necessary files for the extension [#10308](https://github.com/JabRef/jabref/issues/10308) - We fixed an issue where the ISBN fetcher returned the entrytype `misc` for certain ISBN numbers [#10348](https://github.com/JabRef/jabref/issues/10348) From dc82d445c8bbc1ac5126947cb895e062bf6bf6b5 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Tue, 26 Sep 2023 16:44:13 +0200 Subject: [PATCH 3/7] Switch from classic for to streaming --- .../logic/integrity/AmpersandChecker.java | 41 +++++++++---------- .../logic/integrity/HTMLCharacterChecker.java | 22 +++------- 2 files changed, 24 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java index 560e77fbb13..48df6ddfcf6 100644 --- a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -1,17 +1,16 @@ package org.jabref.logic.integrity; -import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.regex.MatchResult; import java.util.regex.Pattern; +import java.util.stream.Stream; import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.FieldProperty; import com.google.common.base.CharMatcher; +import org.jooq.lambda.tuple.Tuple2; /** * Checks if the BibEntry contains unescaped ampersands. @@ -23,24 +22,22 @@ public class AmpersandChecker implements EntryChecker { @Override public List check(BibEntry entry) { - List results = new ArrayList<>(); - - for (Map.Entry field : entry.getFieldMap().entrySet()) { - if (field.getKey().getProperties().contains(FieldProperty.VERBATIM)) { - continue; - } - // counts the number of even \ occurrences preceding an & - long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) - .results() - .map(MatchResult::group) - .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) - .count(); - - if (unescapedAmpersands > 0) { - results.add(new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", unescapedAmpersands), entry, field.getKey())); - // note: when changing the message - also do so in tests - } - } - return results; + return entry.getFieldMap().entrySet().stream() + .filter(field -> !field.getKey().getProperties().contains(FieldProperty.VERBATIM)) + // We use "flatMap" instead of filtering later, because we assume there won't be that much error messages - and construction of Stream.empty() is faster than construction of a new Tuple2 (including lifting long to Long) + .flatMap(field -> { + // counts the number of even \ occurrences preceding an & + long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) + .results() + .map(MatchResult::group) + .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) + .count(); + if (unescapedAmpersands == 0) { + return Stream.empty(); + } + return Stream.of(new Tuple2<>(field.getKey(), unescapedAmpersands)); + }) + .map(tuple -> new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", tuple.v2()), entry, tuple.v1())) + .toList(); } } diff --git a/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java b/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java index 267cf2435b4..874a9c79c8d 100644 --- a/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java +++ b/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java @@ -1,14 +1,10 @@ package org.jabref.logic.integrity; -import java.util.ArrayList; import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.FieldProperty; /** @@ -20,18 +16,10 @@ public class HTMLCharacterChecker implements EntryChecker { @Override public List check(BibEntry entry) { - List results = new ArrayList<>(); - for (Map.Entry field : entry.getFieldMap().entrySet()) { - if (field.getKey().getProperties().contains(FieldProperty.VERBATIM)) { - continue; - } - - Matcher characterMatcher = HTML_CHARACTER_PATTERN.matcher(field.getValue()); - if (characterMatcher.find()) { - results.add( - new IntegrityMessage(Localization.lang("HTML encoded character found"), entry, field.getKey())); - } - } - return results; + return entry.getFieldMap().entrySet().stream() + .filter(field -> !field.getKey().getProperties().contains(FieldProperty.VERBATIM)) + .filter(field -> HTML_CHARACTER_PATTERN.matcher(field.getValue()).find()) + .map(field -> new IntegrityMessage(Localization.lang("HTML encoded character found"), entry, field.getKey())) + .toList(); } } From e18d1994e6d1f672f22a348d914cd901d9b7af0f Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Tue, 26 Sep 2023 19:00:29 +0200 Subject: [PATCH 4/7] extract method and use Pair --- .../logic/integrity/AmpersandChecker.java | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java index 48df6ddfcf6..0645c685c1a 100644 --- a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -1,16 +1,20 @@ package org.jabref.logic.integrity; import java.util.List; +import java.util.Map; +import java.util.function.Function; import java.util.regex.MatchResult; import java.util.regex.Pattern; import java.util.stream.Stream; +import javafx.util.Pair; + import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.FieldProperty; import com.google.common.base.CharMatcher; -import org.jooq.lambda.tuple.Tuple2; /** * Checks if the BibEntry contains unescaped ampersands. @@ -25,19 +29,23 @@ public List check(BibEntry entry) { return entry.getFieldMap().entrySet().stream() .filter(field -> !field.getKey().getProperties().contains(FieldProperty.VERBATIM)) // We use "flatMap" instead of filtering later, because we assume there won't be that much error messages - and construction of Stream.empty() is faster than construction of a new Tuple2 (including lifting long to Long) - .flatMap(field -> { - // counts the number of even \ occurrences preceding an & - long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) - .results() - .map(MatchResult::group) - .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) - .count(); - if (unescapedAmpersands == 0) { - return Stream.empty(); - } - return Stream.of(new Tuple2<>(field.getKey(), unescapedAmpersands)); - }) - .map(tuple -> new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", tuple.v2()), entry, tuple.v1())) + .flatMap(getUnescapedAmpersandsWithCount()) + .map(tuple -> new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", tuple.getValue()), entry, tuple.getKey())) .toList(); } + + private static Function, Stream>> getUnescapedAmpersandsWithCount() { + return field -> { + // counts the number of even \ occurrences preceding an & + long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) + .results() + .map(MatchResult::group) + .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) + .count(); + if (unescapedAmpersands == 0) { + return Stream.empty(); + } + return Stream.of(new Pair<>(field.getKey(), unescapedAmpersands)); + }; + } } From 812ee1a6c78716f3c88dd2bf70ba7b0266548a13 Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Tue, 26 Sep 2023 19:02:07 +0200 Subject: [PATCH 5/7] move changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82774c9369e..6f069f7dd8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,12 +24,12 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - The export formats `listrefs`, `tablerefs`, `tablerefsabsbib`, now use the ISO date format in the footer [#10383](https://github.com/JabRef/jabref/pull/10383). - When searching for an identifier in the "Web search", the title of the search window is now "Identifier-based Web Search". [#10391](https://github.com/JabRef/jabref/pull/10391) +- The ampersand checker now skips verbatim fields (`file`, `url`, ...). [#10419](https://github.com/JabRef/jabref/pull/10419) ### Fixed - It is possible again to use "current table sort order" for the order of entries when saving. [#9869](https://github.com/JabRef/jabref/issues/9869) - Passwords can be stored in GNOME key ring. [#10274](https://github.com/JabRef/jabref/issues/10274) -- The ampersand checker skips verbatim fields (`file`, `url`, ...). [#10419](https://github.com/JabRef/jabref/pull/10419) - We fixed an issue where groups based on an aux file could not be created due to an exception [#10350](https://github.com/JabRef/jabref/issues/10350) - We fixed an issue where the JabRef browser extension could not communicate with JabRef under macOS due to missing files. You should use the `.pkg` for the first installation as it updates all necessary files for the extension [#10308](https://github.com/JabRef/jabref/issues/10308) - We fixed an issue where the ISBN fetcher returned the entrytype `misc` for certain ISBN numbers [#10348](https://github.com/JabRef/jabref/issues/10348) From 9ec11128f53123fea3c7b465474600d067a50633 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Thu, 28 Sep 2023 20:38:52 +0200 Subject: [PATCH 6/7] More "old tyle" Java --- .../logic/integrity/AmpersandChecker.java | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java index 0645c685c1a..9c08ad52e82 100644 --- a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -29,23 +29,21 @@ public List check(BibEntry entry) { return entry.getFieldMap().entrySet().stream() .filter(field -> !field.getKey().getProperties().contains(FieldProperty.VERBATIM)) // We use "flatMap" instead of filtering later, because we assume there won't be that much error messages - and construction of Stream.empty() is faster than construction of a new Tuple2 (including lifting long to Long) - .flatMap(getUnescapedAmpersandsWithCount()) - .map(tuple -> new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", tuple.getValue()), entry, tuple.getKey())) + .flatMap(AmpersandChecker::getUnescapedAmpersandsWithCount) + .map(pair -> new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", pair.getValue()), entry, pair.getKey())) .toList(); } - private static Function, Stream>> getUnescapedAmpersandsWithCount() { - return field -> { - // counts the number of even \ occurrences preceding an & - long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) - .results() - .map(MatchResult::group) - .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) - .count(); - if (unescapedAmpersands == 0) { - return Stream.empty(); - } - return Stream.of(new Pair<>(field.getKey(), unescapedAmpersands)); - }; + private static Stream> getUnescapedAmpersandsWithCount(Map.Entry entry) { + // counts the number of even \ occurrences preceding an & + long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(entry.getValue()) + .results() + .map(MatchResult::group) + .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) + .count(); + if (unescapedAmpersands == 0) { + return Stream.empty(); + } + return Stream.of(new Pair<>(entry.getKey(), unescapedAmpersands)); } } From 03619c371fedca2ff322ee832dbcdfd9fd83c4e0 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Thu, 28 Sep 2023 20:42:34 +0200 Subject: [PATCH 7/7] Fix checkstyle --- src/main/java/org/jabref/logic/integrity/AmpersandChecker.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java index 9c08ad52e82..243fbbddea4 100644 --- a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -2,7 +2,6 @@ import java.util.List; import java.util.Map; -import java.util.function.Function; import java.util.regex.MatchResult; import java.util.regex.Pattern; import java.util.stream.Stream;