Skip to content

Commit

Permalink
Fixes #290
Browse files Browse the repository at this point in the history
  • Loading branch information
Shazwazza committed Sep 7, 2022
1 parent 908721b commit 3516bba
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 2 deletions.
22 changes: 20 additions & 2 deletions src/Examine.Lucene/Analyzers/CultureInvariantStandardAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,27 @@ namespace Examine.Lucene.Analyzers
public sealed class CultureInvariantStandardAnalyzer : Analyzer
{
private readonly CharArraySet _stopWordsSet;
private readonly bool _caseInsensitive;
private readonly bool _ignoreLanguageAccents;

public CultureInvariantStandardAnalyzer(CharArraySet stopWords)
: this(stopWords, true, true)
{
_stopWordsSet = stopWords;

}

public CultureInvariantStandardAnalyzer()
: this(StandardAnalyzer.STOP_WORDS_SET)
{
}

public CultureInvariantStandardAnalyzer(CharArraySet stopWords, bool caseInsensitive, bool ignoreLanguageAccents)
{
_stopWordsSet = stopWords;
_caseInsensitive = caseInsensitive;
_ignoreLanguageAccents = ignoreLanguageAccents;
}

protected override TokenStreamComponents CreateComponents(
string fieldName,
TextReader reader)
Expand All @@ -37,7 +47,15 @@ protected override TokenStreamComponents CreateComponents(

TokenStream result = new StandardFilter(LuceneInfo.CurrentVersion, tokenizer);

result = new LowerCaseFilter(LuceneInfo.CurrentVersion, result);
if (_caseInsensitive)
{
result = new LowerCaseFilter(LuceneInfo.CurrentVersion, result);
}

if (_ignoreLanguageAccents)
{
result = new ASCIIFoldingFilter(result ?? tokenizer);
}

result = new StopFilter(LuceneInfo.CurrentVersion, result, _stopWordsSet);

Expand Down
70 changes: 70 additions & 0 deletions src/Examine.Test/Examine.Lucene/Search/AnalyzerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
using Examine.Lucene.Analyzers;
using Examine.Lucene.Providers;
using NUnit.Framework;

namespace Examine.Test.Examine.Lucene.Search
{
[TestFixture]
public class AnalyzerTests : ExamineBaseTest
{
[Test]
public void Given_CultureInvariantWhitespaceAnalyzer_When_SearchingBothCharVariants_Then_BothAreFound()
{
var analyzer = new CultureInvariantWhitespaceAnalyzer();
using (var luceneDir = new RandomIdRAMDirectory())
using (var indexer = GetTestIndex(luceneDir, analyzer))
{
indexer.IndexItems(new[] {
ValueSet.FromObject(1.ToString(), "content",
new { bodyText = "Something rød something"}),
ValueSet.FromObject(2.ToString(), "content",
new { nodeName = "Something rod something"})
});

var searcher = (BaseLuceneSearcher)indexer.Searcher;

var query1 = searcher
.CreateQuery("content")
.Field("bodyText", "rod");
var results1 = query1.Execute();

var query2 = searcher
.CreateQuery("content")
.Field("bodyText", "rød");
var results2 = query1.Execute();

Assert.AreEqual(1, results1.TotalItemCount);
}
}

[Test]
public void Given_CultureInvariantStandardAnalyzer_When_SearchingBothCharVariants_Then_BothAreFound()
{
var analyzer = new CultureInvariantStandardAnalyzer();
using (var luceneDir = new RandomIdRAMDirectory())
using (var indexer = GetTestIndex(luceneDir, analyzer))
{
indexer.IndexItems(new[] {
ValueSet.FromObject(1.ToString(), "content",
new { bodyText = "Something rød something"}),
ValueSet.FromObject(2.ToString(), "content",
new { nodeName = "Something rod something"})
});

var searcher = (BaseLuceneSearcher)indexer.Searcher;

var query1 = searcher
.CreateQuery("content")
.Field("bodyText", "rod");
var results1 = query1.Execute();

var query2 = searcher
.CreateQuery("content")
.Field("bodyText", "rød");
var results2 = query1.Execute();

Assert.AreEqual(1, results1.TotalItemCount);
}
}
}
}

0 comments on commit 3516bba

Please sign in to comment.