Skip to content

Commit

Permalink
Fix searching for Non-ASCII characters
Browse files Browse the repository at this point in the history
  • Loading branch information
LoayGhreeb committed Aug 28, 2024
1 parent cc3c002 commit 909afe4
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import java.util.Map;

import org.jabref.gui.util.BackgroundTask;
import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.util.HeadlessExecutorService;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
Expand All @@ -27,6 +29,7 @@

public class BibFieldsIndexer implements LuceneIndexer {
private static final Logger LOGGER = LoggerFactory.getLogger(BibFieldsIndexer.class);
private static final Formatter FORMATTER = new LatexToUnicodeFormatter();
private final BibDatabaseContext databaseContext;
private final String libraryName;
private final Directory indexDirectory;
Expand Down Expand Up @@ -82,8 +85,9 @@ private void addToIndex(BibEntry bibEntry) {

StringBuilder allFields = new StringBuilder(bibEntry.getType().getName());
for (Map.Entry<Field, String> mapEntry : bibEntry.getFieldMap().entrySet()) {
document.add(new TextField(mapEntry.getKey().getName(), mapEntry.getValue(), storeDisabled));
allFields.append('\n').append(mapEntry.getValue());
String value = FORMATTER.format(mapEntry.getValue());
document.add(new TextField(mapEntry.getKey().getName(), value, storeDisabled));
allFields.append('\n').append(value);
}
document.add(new TextField(SearchFieldConstants.DEFAULT_FIELD.toString(), allFields.toString(), storeDisabled));
indexWriter.addDocument(document);
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/org/jabref/model/search/SearchQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;

import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.QueryTermExtractor;
import org.apache.lucene.search.highlight.WeightedTerm;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SearchQuery {

private static final Formatter FORMATTER = new LatexToUnicodeFormatter();
/**
* The mode of escaping special characters in regular expressions
*/
Expand Down Expand Up @@ -59,7 +60,6 @@ String format(String regex) {
abstract String format(String regex);
}

private final static Logger LOGGER = LoggerFactory.getLogger(SearchQuery.class);
protected final String query;
protected Query parsedQuery;
protected String parseError;
Expand Down Expand Up @@ -87,7 +87,7 @@ public SearchQuery(String query, EnumSet<SearchFlags> searchFlags) {
queryParser.setAllowLeadingWildcard(true);

try {
parsedQuery = queryParser.parse(query);
parsedQuery = queryParser.parse(FORMATTER.format(query));
parseError = null;
} catch (ParseException e) {
parsedQuery = null;
Expand Down
38 changes: 38 additions & 0 deletions src/test/java/org/jabref/logic/search/LuceneQueryParserTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.jabref.logic.search;

import java.util.EnumSet;
import java.util.stream.Stream;

import org.jabref.model.search.SearchFlags;
import org.jabref.model.search.SearchQuery;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class LuceneQueryParserTest {

public static Stream<Arguments> searchQuires() {
return Stream.of(
// unicode
Arguments.of("preissinger", "preißinger"),
Arguments.of("jesus", "jesús"),
Arguments.of("breitenbucher", "breitenbücher"),

// latex
Arguments.of("preissinger", "prei{\\ss}inger"),
Arguments.of("jesus", "jes{\\'{u}}s"),
Arguments.of("breitenbucher", "breitenb{\\\"{u}}cher")
);
}

@ParameterizedTest
@MethodSource
void searchQuires(String expected, String query) {
expected = "(all:" + expected + ")^4.0";
SearchQuery searchQuery = new SearchQuery(query, EnumSet.noneOf(SearchFlags.class));
assertEquals(expected, searchQuery.getParsedQuery().toString());
}
}

0 comments on commit 909afe4

Please sign in to comment.