From 909afe490adacfd57ad3339757ab5121aa344890 Mon Sep 17 00:00:00 2001 From: Loay Ghreeb Date: Wed, 28 Aug 2024 07:33:54 +0300 Subject: [PATCH] Fix searching for Non-ASCII characters --- .../search/indexing/BibFieldsIndexer.java | 8 +++- .../org/jabref/model/search/SearchQuery.java | 10 ++--- .../logic/search/LuceneQueryParserTest.java | 38 +++++++++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 src/test/java/org/jabref/logic/search/LuceneQueryParserTest.java diff --git a/src/main/java/org/jabref/logic/search/indexing/BibFieldsIndexer.java b/src/main/java/org/jabref/logic/search/indexing/BibFieldsIndexer.java index 5a94d152959..f3d76248c7f 100644 --- a/src/main/java/org/jabref/logic/search/indexing/BibFieldsIndexer.java +++ b/src/main/java/org/jabref/logic/search/indexing/BibFieldsIndexer.java @@ -5,7 +5,9 @@ import java.util.Map; import org.jabref.gui.util.BackgroundTask; +import org.jabref.logic.cleanup.Formatter; import org.jabref.logic.l10n.Localization; +import org.jabref.logic.layout.format.LatexToUnicodeFormatter; import org.jabref.logic.util.HeadlessExecutorService; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; @@ -27,6 +29,7 @@ public class BibFieldsIndexer implements LuceneIndexer { private static final Logger LOGGER = LoggerFactory.getLogger(BibFieldsIndexer.class); + private static final Formatter FORMATTER = new LatexToUnicodeFormatter(); private final BibDatabaseContext databaseContext; private final String libraryName; private final Directory indexDirectory; @@ -82,8 +85,9 @@ private void addToIndex(BibEntry bibEntry) { StringBuilder allFields = new StringBuilder(bibEntry.getType().getName()); for (Map.Entry mapEntry : bibEntry.getFieldMap().entrySet()) { - document.add(new TextField(mapEntry.getKey().getName(), mapEntry.getValue(), storeDisabled)); - allFields.append('\n').append(mapEntry.getValue()); + String value = FORMATTER.format(mapEntry.getValue()); + document.add(new TextField(mapEntry.getKey().getName(), value, storeDisabled)); + allFields.append('\n').append(value); } document.add(new TextField(SearchFieldConstants.DEFAULT_FIELD.toString(), allFields.toString(), storeDisabled)); indexWriter.addDocument(document); diff --git a/src/main/java/org/jabref/model/search/SearchQuery.java b/src/main/java/org/jabref/model/search/SearchQuery.java index 4ef1f6a0d46..674c502d3df 100644 --- a/src/main/java/org/jabref/model/search/SearchQuery.java +++ b/src/main/java/org/jabref/model/search/SearchQuery.java @@ -12,16 +12,17 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import org.jabref.logic.cleanup.Formatter; +import org.jabref.logic.layout.format.LatexToUnicodeFormatter; + import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.QueryTermExtractor; import org.apache.lucene.search.highlight.WeightedTerm; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class SearchQuery { - + private static final Formatter FORMATTER = new LatexToUnicodeFormatter(); /** * The mode of escaping special characters in regular expressions */ @@ -59,7 +60,6 @@ String format(String regex) { abstract String format(String regex); } - private final static Logger LOGGER = LoggerFactory.getLogger(SearchQuery.class); protected final String query; protected Query parsedQuery; protected String parseError; @@ -87,7 +87,7 @@ public SearchQuery(String query, EnumSet searchFlags) { queryParser.setAllowLeadingWildcard(true); try { - parsedQuery = queryParser.parse(query); + parsedQuery = queryParser.parse(FORMATTER.format(query)); parseError = null; } catch (ParseException e) { parsedQuery = null; diff --git a/src/test/java/org/jabref/logic/search/LuceneQueryParserTest.java b/src/test/java/org/jabref/logic/search/LuceneQueryParserTest.java new file mode 100644 index 00000000000..9a471e305bc --- /dev/null +++ b/src/test/java/org/jabref/logic/search/LuceneQueryParserTest.java @@ -0,0 +1,38 @@ +package org.jabref.logic.search; + +import java.util.EnumSet; +import java.util.stream.Stream; + +import org.jabref.model.search.SearchFlags; +import org.jabref.model.search.SearchQuery; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class LuceneQueryParserTest { + + public static Stream searchQuires() { + return Stream.of( + // unicode + Arguments.of("preissinger", "preißinger"), + Arguments.of("jesus", "jesús"), + Arguments.of("breitenbucher", "breitenbücher"), + + // latex + Arguments.of("preissinger", "prei{\\ss}inger"), + Arguments.of("jesus", "jes{\\'{u}}s"), + Arguments.of("breitenbucher", "breitenb{\\\"{u}}cher") + ); + } + + @ParameterizedTest + @MethodSource + void searchQuires(String expected, String query) { + expected = "(all:" + expected + ")^4.0"; + SearchQuery searchQuery = new SearchQuery(query, EnumSet.noneOf(SearchFlags.class)); + assertEquals(expected, searchQuery.getParsedQuery().toString()); + } +}