Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

skip take & select fields #184

Merged
merged 15 commits into from
Sep 25, 2020
161 changes: 161 additions & 0 deletions src/Examine.Test/Search/FluentApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,167 @@ public void Category()
// BooleanQuery.MaxClauseCount = 1024;
// }
//}
[Test]
public void Select_Field()
{
var analyzer = new StandardAnalyzer(Version.LUCENE_30);
using (var luceneDir = new RandomIdRAMDirectory())
using (var indexer = new TestIndex(luceneDir, analyzer))

{
indexer.IndexItems(new[] {
new ValueSet(1.ToString(), "content",
new Dictionary<string, object>
{
{"id","1" },
{"nodeName", "my name 1"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,789"}
}),
new ValueSet(2.ToString(), "content",
new Dictionary<string, object>
{
{"id","2" },
{"nodeName", "my name 2"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,987"}
})
});

var searcher = indexer.GetSearcher();
var sc = searcher.CreateQuery("content");
var sc1 = sc.Field("nodeName", "my name 1")
.And().SelectField("__Path");

var results = sc1.Execute();
var expectedLoadedFields = new string[] { "__Path"};
var keys = results.First().Values.Keys.ToArray();
Assert.True(keys.All(x => expectedLoadedFields.Contains(x)));
Assert.True(expectedLoadedFields.All(x => keys.Contains(x)));
}


}
[Test]
public void Select_FirstField()
{
var analyzer = new StandardAnalyzer(Version.LUCENE_30);
using (var luceneDir = new RandomIdRAMDirectory())
using (var indexer = new TestIndex(luceneDir, analyzer))

{
indexer.IndexItems(new[] {
new ValueSet(1.ToString(), "content",
new Dictionary<string, object>
{
{"id","1" },
{"nodeName", "my name 1"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,789"}
}),
new ValueSet(2.ToString(), "content",
new Dictionary<string, object>
{
{"id","2" },
{"nodeName", "my name 2"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,987"}
})
});

var searcher = indexer.GetSearcher();
var sc = searcher.CreateQuery("content");
var sc1 = sc.Field("nodeName", "my name 1")
.And().SelectFirstFieldOnly();

var results = sc1.Execute();
var expectedLoadedFields = new string[] { "__NodeId" };
var keys = results.First().Values.Keys.ToArray();
Assert.True(keys.All(x => expectedLoadedFields.Contains(x)));
Assert.True(expectedLoadedFields.All(x => keys.Contains(x)));
}
}

[Test]
public void Select_Fields()
{
var analyzer = new StandardAnalyzer(Version.LUCENE_30);
using (var luceneDir = new RandomIdRAMDirectory())
using (var indexer = new TestIndex(luceneDir, analyzer))

{
indexer.IndexItems(new[] {
new ValueSet(1.ToString(), "content",
new Dictionary<string, object>
{
{"id","1" },
{"nodeName", "my name 1"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,789"}
}),
new ValueSet(2.ToString(), "content",
new Dictionary<string, object>
{
{"id","2" },
{"nodeName", "my name 2"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,987"}
})
});

var searcher = indexer.GetSearcher();
var sc = searcher.CreateQuery("content");
var sc1 = sc.Field("nodeName", "my name 1")
.And().SelectFields("nodeName","bodyText", "id", "__NodeId");

var results = sc1.Execute();
var expectedLoadedFields = new string[] { "nodeName", "bodyText","id","__NodeId" };
var keys = results.First().Values.Keys.ToArray();
Assert.True(keys.All(x => expectedLoadedFields.Contains(x)));
Assert.True(expectedLoadedFields.All(x => keys.Contains(x)));
}

}

[Test]
public void Select_Fields_HashSet()
{
var analyzer = new StandardAnalyzer(Version.LUCENE_30);
using (var luceneDir = new RandomIdRAMDirectory())
using (var indexer = new TestIndex(luceneDir, analyzer))

{
indexer.IndexItems(new[] {
new ValueSet(1.ToString(), "content",
new Dictionary<string, object>
{
{"id","1" },
{"nodeName", "my name 1"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,789"}
}),
new ValueSet(2.ToString(), "content",
new Dictionary<string, object>
{
{"id","2" },
{"nodeName", "my name 2"},
{"bodyText", "lorem ipsum"},
{"__Path", "-1,123,456,987"}
})
});

var searcher = indexer.GetSearcher();
var sc = searcher.CreateQuery("content");
var sc1 = sc.Field("nodeName", "my name 1")
.And().SelectFields(new HashSet<string>(new string[]{ "nodeName", "bodyText" }));

var results = sc1.Execute();
var expectedLoadedFields = new string[] { "nodeName", "bodyText" };
var keys = results.First().Values.Keys.ToArray();
Assert.True(keys.All(x => expectedLoadedFields.Contains(x)));
Assert.True(expectedLoadedFields.All(x => keys.Contains(x)));
}
}

}
}
104 changes: 92 additions & 12 deletions src/Examine/LuceneEngine/LuceneSearchResults.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,88 @@ public static ISearchResults Empty()

public TopDocs TopDocs { get; private set; }

public FieldSelector FieldSelector { get; }

internal LuceneSearchResults(Query query, IEnumerable<SortField> sortField, Searcher searcher, int maxResults)
internal LuceneSearchResults(Query query, IEnumerable<SortField> sortField, Searcher searcher, int maxResults, FieldSelector fieldSelector)
{
LuceneQuery = query;

FieldSelector = fieldSelector;
LuceneSearcher = searcher;
DoSearch(query, sortField, maxResults);
}
internal LuceneSearchResults(Query query, IEnumerable<SortField> sortField, Searcher searcher, int skip, int? take = null, FieldSelector fieldSelector = null)
{
LuceneQuery = query;

LuceneSearcher = searcher;
DoSearch(query, sortField, skip, take);
FieldSelector = fieldSelector;
}

private void DoSearch(Query query, IEnumerable<SortField> sortField, int skip, int? take = null)
{
int maxResults = take != null ? take.Value + skip : int.MaxValue;
//This try catch is because analyzers strip out stop words and sometimes leave the query
//with null values. This simply tries to extract terms, if it fails with a null
//reference then its an invalid null query, NotSupporteException occurs when the query is
//valid but the type of query can't extract terms.
//This IS a work-around, theoretically Lucene itself should check for null query parameters
//before throwing exceptions.
try
{
var set = new HashSet<Term>();
query.ExtractTerms(set);
}
catch (NullReferenceException)
{
//this means that an analyzer has stipped out stop words and now there are
//no words left to search on

//it could also mean that potentially a IIndexFieldValueType is throwing a null ref
TotalItemCount = 0;
return;
}
catch (NotSupportedException)
{
//swallow this exception, we should continue if this occurs.
}

maxResults = maxResults >= 1 ? Math.Min(maxResults, LuceneSearcher.MaxDoc) : LuceneSearcher.MaxDoc;

Collector topDocsCollector;
var sortFields = sortField as SortField[] ?? sortField.ToArray();
if (sortFields.Length > 0)
{
topDocsCollector = TopFieldCollector.Create(
new Sort(sortFields), maxResults, false, false, false, false);
}
else
{
topDocsCollector = TopScoreDocCollector.Create(maxResults, true);
}

LuceneSearcher.Search(query, topDocsCollector);

if (sortFields.Length > 0 && take != null && take.Value >= 0)
{
TopDocs = ((TopFieldCollector)topDocsCollector).TopDocs(skip,take.Value);
}
else if (sortFields.Length > 0 && take == null || take.Value < 0)
{
TopDocs = ((TopFieldCollector)topDocsCollector).TopDocs(skip);
}
else if ( take != null && take.Value >= 0)
{
TopDocs = ((TopScoreDocCollector)topDocsCollector).TopDocs(skip,take.Value);
}
else
{
TopDocs = ((TopScoreDocCollector)topDocsCollector).TopDocs(skip);
}

TotalItemCount = TopDocs.TotalHits;
}


private void DoSearch(Query query, IEnumerable<SortField> sortField, int maxResults)
{
//This try catch is because analyzers strip out stop words and sometimes leave the query
Expand Down Expand Up @@ -157,13 +229,13 @@ private SearchResult PrepareSearchResult(float score, Document doc)

return resultVals;
});

return sr;
}

//NOTE: If we moved this logic inside of the 'Skip' method like it used to be then we get the Code Analysis barking
// at us because of Linq requirements and 'MoveNext()'. This method is to work around this behavior.

private SearchResult CreateFromDocumentItem(int i)
{
// I have seen IndexOutOfRangeException here which is strange as this is only called in one place
Expand All @@ -175,14 +247,22 @@ private SearchResult CreateFromDocumentItem(int i)
var scoreDoc = TopDocs.ScoreDocs[i];

var docId = scoreDoc.Doc;
var doc = LuceneSearcher.Doc(docId);
Document doc;
if(FieldSelector != null)
{
doc = LuceneSearcher.Doc(docId, FieldSelector);
}
else
{
doc = LuceneSearcher.Doc(docId);
}
var score = scoreDoc.Score;
var result = CreateSearchResult(doc, score);
return result;
}

//NOTE: This is totally retarded but it is required for medium trust as I cannot put this code inside the Skip method... wtf

private int GetScoreDocsLength()
{
if (TopDocs?.ScoreDocs == null)
Expand All @@ -200,7 +280,7 @@ private int GetScoreDocsLength()
/// </remarks>
/// <param name="skip">The number of items in the results to skip.</param>
/// <returns>A collection of the search results</returns>

public IEnumerable<ISearchResult> Skip(int skip)
{
for (int i = skip, n = GetScoreDocsLength(); i < n; i++)
Expand All @@ -209,7 +289,7 @@ public IEnumerable<ISearchResult> Skip(int skip)
if (!Docs.ContainsKey(i))
{
var r = CreateFromDocumentItem(i);
if (r == null)
if (r == null)
continue;

Docs.Add(i, r);
Expand All @@ -232,7 +312,7 @@ private struct DecrementReaderResult : IEnumerator<ISearchResult>
private readonly IEnumerator<ISearchResult> _baseEnumerator;
private readonly IndexSearcher _searcher;


public DecrementReaderResult(IEnumerator<ISearchResult> baseEnumerator, Searcher searcher)
{
_baseEnumerator = baseEnumerator;
Expand All @@ -241,7 +321,7 @@ public DecrementReaderResult(IEnumerator<ISearchResult> baseEnumerator, Searcher
_searcher?.IndexReader.IncRef();
}


public void Dispose()
{
_baseEnumerator.Dispose();
Expand All @@ -268,7 +348,7 @@ public void Reset()
/// Gets the enumerator starting at position 0
/// </summary>
/// <returns>A collection of the search results</returns>

public IEnumerator<ISearchResult> GetEnumerator()
{
return new DecrementReaderResult(
Expand Down
4 changes: 4 additions & 0 deletions src/Examine/LuceneEngine/Search/LuceneBooleanOperation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,9 @@ public LuceneBooleanOperation(LuceneSearchQuery search)
#endregion

public override string ToString() => _search.ToString();

public override ISearchResults Execute(int take, int skip) => _search.ExecuteWithSkip(skip, take);

public override ISearchResults ExecuteWithSkip(int skip, int? take = null) => _search.ExecuteWithSkip(skip, take);
}
}
3 changes: 3 additions & 0 deletions src/Examine/LuceneEngine/Search/LuceneBooleanOperationBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ protected internal LuceneBooleanOperationBase Op(
}

public abstract ISearchResults Execute(int maxResults = 500);
public abstract ISearchResults Execute(int take, int skip);
public abstract IOrdering OrderBy(params SortableField[] fields);
public abstract IOrdering OrderByDescending(params SortableField[] fields);

public abstract ISearchResults ExecuteWithSkip(int skip, int? take = null);
nzdev marked this conversation as resolved.
Show resolved Hide resolved
}
}
10 changes: 9 additions & 1 deletion src/Examine/LuceneEngine/Search/LuceneQuery.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public IBooleanOperation RangeQuery<T>(string[] fields, T? min, T? max, bool min

public string Category => _search.Category;

public IBooleanOperation NativeQuery(string query) => _search.NativeQuery(query);
public IBooleanOperation NativeQuery(string query, ISet<string> loadedFieldNames = null) => _search.NativeQuery(query, loadedFieldNames);

/// <inheritdoc />
public IBooleanOperation Group(Func<INestedQuery, INestedBooleanOperation> inner, BooleanOperation defaultOp = BooleanOperation.Or)
Expand Down Expand Up @@ -103,6 +103,14 @@ INestedBooleanOperation INestedQuery.GroupedNot(IEnumerable<string> fields, para
INestedBooleanOperation INestedQuery.RangeQuery<T>(string[] fields, T? min, T? max, bool minInclusive, bool maxInclusive)
=> _search.RangeQueryInternal(fields, min, max, minInclusive: minInclusive, maxInclusive: maxInclusive);

public IBooleanOperation SelectFields(params string[] fieldNames) => _search.SelectFields(fieldNames);

public IBooleanOperation SelectFields(ISet<string> fieldNames) => _search.SelectFields(fieldNames);

public IBooleanOperation SelectField(string fieldName) => _search.SelectField(fieldName);

public IBooleanOperation SelectFirstFieldOnly() => _search.SelectFirstFieldOnly();

public IBooleanOperation SelectAllFields() => _search.SelectAllFields();
}
}
Loading