diff --git a/.gitignore b/.gitignore index 7b482db063..ce84baa865 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,13 @@ release/ .tools/ # NUnit test result file produced by nunit3-console.exe -[Tt]est[Rr]esult.xml \ No newline at end of file +[Tt]est[Rr]esult.xml +websites/**/_site/* +websites/**/tools/* +websites/**/_exported_templates/* +websites/**/api/.manifest +websites/**/docfx.log +websites/**/lucenetemplate/plugins/* +websites/apidocs/api/**/*.yml +websites/apidocs/api/**/*.manifest +!websites/apidocs/api/toc.yml \ No newline at end of file diff --git a/Lucene.Net.sln b/Lucene.Net.sln index d80ff6124e..1991479c8a 100644 --- a/Lucene.Net.sln +++ b/Lucene.Net.sln @@ -112,6 +112,15 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lucene.Net.Tests.Join", "sr EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lucene.Net.Tests.Memory", "src\Lucene.Net.Tests.Memory\Lucene.Net.Tests.Memory.csproj", "{3BE7B6EA-8DBC-45E2-947C-1CA7E63B5603}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "apidocs", "apidocs", "{58FD6E39-F30F-4566-90E5-B7C9D6BC0660}" + ProjectSection(SolutionItems) = preProject + apidocs\docfx.filter.yml = apidocs\docfx.filter.yml + apidocs\docfx.json = apidocs\docfx.json + apidocs\docs.ps1 = apidocs\docs.ps1 + apidocs\index.md = apidocs\index.md + apidocs\toc.yml = apidocs\toc.yml + EndProjectSection +EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lucene.Net.Tests.Misc", "src\Lucene.Net.Tests.Misc\Lucene.Net.Tests.Misc.csproj", "{F8DDC5B7-A621-4B67-AB4B-BBE083C05BB8}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lucene.Net.Tests.Queries", "src\Lucene.Net.Tests.Queries\Lucene.Net.Tests.Queries.csproj", "{AC750DC0-05A3-4F96-8CC5-CFC8FD01D4CF}" @@ -357,8 +366,8 @@ Global HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution - {EFB2E31A-5917-49D5-A808-FE5061A550B4} = {8CA61D33-3590-4024-A304-7B1F75B50653} {4DF7EACE-2B25-43F6-B558-8520BF20BD76} = {8CA61D33-3590-4024-A304-7B1F75B50653} + {EFB2E31A-5917-49D5-A808-FE5061A550B4} = {8CA61D33-3590-4024-A304-7B1F75B50653} {119BBACD-D4DB-4E3B-922F-3DA83E0B29E2} = {4DF7EACE-2B25-43F6-B558-8520BF20BD76} {CF3A74CA-FEFD-4F41-961B-CC8CF8D96286} = {8CA61D33-3590-4024-A304-7B1F75B50653} {4B054831-5275-44E2-A4D4-CA0B19BEE19A} = {8CA61D33-3590-4024-A304-7B1F75B50653} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/package.md index b4b5e73271..c5bb917787 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/package.md @@ -16,7 +16,7 @@ limitations under the License. --> - + Analyzer for Chinese, Japanese, and Korean, which indexes bigrams. This analyzer generates bigram terms, which are overlapping groups of two adjacent Han, Hiragana, Katakana, or Hangul characters. diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Cn/package.md index 50a3555371..51fbfdc159 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/package.md @@ -16,7 +16,7 @@ limitations under the License. --> - + Analyzer for Chinese, which indexes unigrams (individual chinese characters). diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Compound/package.md index 77585b4e9e..c807b87f31 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/package.md @@ -74,8 +74,8 @@ filter available: #### HyphenationCompoundWordTokenFilter -The [](xref:Lucene.Net.Analysis.Compound.HyphenationCompoundWordTokenFilter -HyphenationCompoundWordTokenFilter) uses hyphenation grammars to find +The [ +HyphenationCompoundWordTokenFilter](xref:Lucene.Net.Analysis.Compound.HyphenationCompoundWordTokenFilter) uses hyphenation grammars to find potential subwords that a worth to check against the dictionary. It can be used without a dictionary as well but then produces a lot of "nonword" tokens. The quality of the output tokens is directly connected to the quality of the @@ -101,8 +101,8 @@ Credits for the hyphenation code go to the #### DictionaryCompoundWordTokenFilter -The [](xref:Lucene.Net.Analysis.Compound.DictionaryCompoundWordTokenFilter -DictionaryCompoundWordTokenFilter) uses a dictionary-only approach to +The [ +DictionaryCompoundWordTokenFilter](xref:Lucene.Net.Analysis.Compound.DictionaryCompoundWordTokenFilter) uses a dictionary-only approach to find subwords in a compound word. It is much slower than the one that uses the hyphenation grammars. You can use it as a first start to see if your dictionary is good or not because it is much simpler in design. diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/package.md index bf1ec16dda..dc5c944cb8 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/package.md @@ -15,11 +15,8 @@ See the License for the specific language governing permissions and limitations under the License. --> - - - org.apache.lucene.analysis.payloads - - + + + Provides various convenience classes for creating payloads on Tokens. - - \ No newline at end of file + diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/package.md index d9b4794ce6..4e89cd46ab 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/package.md @@ -15,13 +15,10 @@ See the License for the specific language governing permissions and limitations under the License. --> - - - org.apache.lucene.analysis.sinks - - -[](xref:Lucene.Net.Analysis.Sinks.TeeSinkTokenFilter) and implementations -of [](xref:Lucene.Net.Analysis.Sinks.TeeSinkTokenFilter.SinkFilter) that + + + + and implementations +of that might be useful. - - \ No newline at end of file + diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/package.md index fc93a1dac6..48ae57e8f5 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/package.md @@ -16,7 +16,7 @@ limitations under the License. --> -[](xref:Lucene.Net.Analysis.TokenFilter) and [](xref:Lucene.Net.Analysis.Analyzer) implementations that use Snowball + and implementations that use Snowball stemmers. This project provides pre-compiled version of the Snowball stemmers based on revision 500 of the Tartarus Snowball repository, together with classes integrating them with the Lucene search engine. diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/package.md index aaee44bec2..7d67974ea6 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Backwards-compatible implementation to match [](xref:Lucene.Net.Util.Version.LUCENE_31) \ No newline at end of file +Backwards-compatible implementation to match [#LUCENE_31](xref:Lucene.Net.Util.Version) \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/package.md index 0417d2457f..4f5fe5fc41 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Backwards-compatible implementation to match [](xref:Lucene.Net.Util.Version.LUCENE_34) \ No newline at end of file +Backwards-compatible implementation to match [#LUCENE_34](xref:Lucene.Net.Util.Version) \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/package.md index ee550da586..a4be333d4a 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Backwards-compatible implementation to match [](xref:Lucene.Net.Util.Version.LUCENE_36) \ No newline at end of file +Backwards-compatible implementation to match [#LUCENE_36](xref:Lucene.Net.Util.Version) \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/package.md index 038f829155..78c2c19fe0 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Backwards-compatible implementation to match [](xref:Lucene.Net.Util.Version.LUCENE_40) \ No newline at end of file +Backwards-compatible implementation to match [#LUCENE_40](xref:Lucene.Net.Util.Version) \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/package.md b/src/Lucene.Net.Analysis.Common/Analysis/Standard/package.md index fa2696c9fc..10033d4b7f 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/package.md +++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/package.md @@ -20,7 +20,7 @@ The `org.apache.lucene.analysis.standard` package contains three fast grammar-based tokenizers constructed with JFlex: -* [](xref:Lucene.Net.Analysis.Standard.StandardTokenizer): +* : as of Lucene 3.1, implements the Word Break rules from the Unicode Text Segmentation algorithm, as specified in [Unicode Standard Annex #29](http://unicode.org/reports/tr29/). @@ -28,32 +28,32 @@ The `org.apache.lucene.analysis.standard` package contains three fast grammar-ba **not** tokenized as single tokens, but are instead split up into tokens according to the UAX#29 word break rules. - [](xref:Lucene.Net.Analysis.Standard.StandardAnalyzer StandardAnalyzer) includes - [](xref:Lucene.Net.Analysis.Standard.StandardTokenizer StandardTokenizer), - [](xref:Lucene.Net.Analysis.Standard.StandardFilter StandardFilter), - [](xref:Lucene.Net.Analysis.Core.LowerCaseFilter LowerCaseFilter) - and [](xref:Lucene.Net.Analysis.Core.StopFilter StopFilter). + [StandardAnalyzer](xref:Lucene.Net.Analysis.Standard.StandardAnalyzer) includes + [StandardTokenizer](xref:Lucene.Net.Analysis.Standard.StandardTokenizer), + [StandardFilter](xref:Lucene.Net.Analysis.Standard.StandardFilter), + [LowerCaseFilter](xref:Lucene.Net.Analysis.Core.LowerCaseFilter) + and [StopFilter](xref:Lucene.Net.Analysis.Core.StopFilter). When the `Version` specified in the constructor is lower than - 3.1, the [](xref:Lucene.Net.Analysis.Standard.ClassicTokenizer ClassicTokenizer) + 3.1, the [ClassicTokenizer](xref:Lucene.Net.Analysis.Standard.ClassicTokenizer) implementation is invoked. -* [](xref:Lucene.Net.Analysis.Standard.ClassicTokenizer ClassicTokenizer): +* [ClassicTokenizer](xref:Lucene.Net.Analysis.Standard.ClassicTokenizer): this class was formerly (prior to Lucene 3.1) named `StandardTokenizer`. (Its tokenization rules are not based on the Unicode Text Segmentation algorithm.) - [](xref:Lucene.Net.Analysis.Standard.ClassicAnalyzer ClassicAnalyzer) includes - [](xref:Lucene.Net.Analysis.Standard.ClassicTokenizer ClassicTokenizer), - [](xref:Lucene.Net.Analysis.Standard.StandardFilter StandardFilter), - [](xref:Lucene.Net.Analysis.Core.LowerCaseFilter LowerCaseFilter) - and [](xref:Lucene.Net.Analysis.Core.StopFilter StopFilter). + [ClassicAnalyzer](xref:Lucene.Net.Analysis.Standard.ClassicAnalyzer) includes + [ClassicTokenizer](xref:Lucene.Net.Analysis.Standard.ClassicTokenizer), + [StandardFilter](xref:Lucene.Net.Analysis.Standard.StandardFilter), + [LowerCaseFilter](xref:Lucene.Net.Analysis.Core.LowerCaseFilter) + and [StopFilter](xref:Lucene.Net.Analysis.Core.StopFilter). -* [](xref:Lucene.Net.Analysis.Standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer): +* [UAX29URLEmailTokenizer](xref:Lucene.Net.Analysis.Standard.UAX29URLEmailTokenizer): implements the Word Break rules from the Unicode Text Segmentation algorithm, as specified in [Unicode Standard Annex #29](http://unicode.org/reports/tr29/). URLs and email addresses are also tokenized according to the relevant RFCs. - [](xref:Lucene.Net.Analysis.Standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer) includes - [](xref:Lucene.Net.Analysis.Standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer), - [](xref:Lucene.Net.Analysis.Standard.StandardFilter StandardFilter), - [](xref:Lucene.Net.Analysis.Core.LowerCaseFilter LowerCaseFilter) - and [](xref:Lucene.Net.Analysis.Core.StopFilter StopFilter). \ No newline at end of file + [UAX29URLEmailAnalyzer](xref:Lucene.Net.Analysis.Standard.UAX29URLEmailAnalyzer) includes + [UAX29URLEmailTokenizer](xref:Lucene.Net.Analysis.Standard.UAX29URLEmailTokenizer), + [StandardFilter](xref:Lucene.Net.Analysis.Standard.StandardFilter), + [LowerCaseFilter](xref:Lucene.Net.Analysis.Core.LowerCaseFilter) + and [StopFilter](xref:Lucene.Net.Analysis.Core.StopFilter). \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/package.md b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/package.md index 1fcb461821..1a702a6491 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/package.md +++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Custom [](xref:Lucene.Net.Util.AttributeImpl) for indexing collation keys as index terms. \ No newline at end of file +Custom for indexing collation keys as index terms. \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Collation/package.md b/src/Lucene.Net.Analysis.Common/Collation/package.md index 7d4f8441da..cca82e8440 100644 --- a/src/Lucene.Net.Analysis.Common/Collation/package.md +++ b/src/Lucene.Net.Analysis.Common/Collation/package.md @@ -28,8 +28,8 @@ very slow.) * Effective Locale-specific normalization (case differences, diacritics, etc.). - ([](xref:Lucene.Net.Analysis.Core.LowerCaseFilter) and - [](xref:Lucene.Net.Analysis.Miscellaneous.ASCIIFoldingFilter) provide these services + ( and + provide these services in a generic way that doesn't take into account locale-specific needs.) ## Example Usages diff --git a/src/Lucene.Net.Analysis.Common/overview.md b/src/Lucene.Net.Analysis.Common/overview.md index bd1a57a985..7d8c3cf5f2 100644 --- a/src/Lucene.Net.Analysis.Common/overview.md +++ b/src/Lucene.Net.Analysis.Common/overview.md @@ -1,4 +1,9 @@ - -Custom [](xref:Lucene.Net.Util.AttributeImpl) for indexing collation keys as index terms. \ No newline at end of file +Custom for indexing collation keys as index terms. \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.ICU/overview.md b/src/Lucene.Net.Analysis.ICU/overview.md index 28005138dc..c0f1c6d3cb 100644 --- a/src/Lucene.Net.Analysis.ICU/overview.md +++ b/src/Lucene.Net.Analysis.ICU/overview.md @@ -1,4 +1,9 @@ - - - - Apache Lucene ICU integration module - + + This module exposes functionality from [ICU](http://site.icu-project.org/) to Apache Lucene. ICU4J is a Java @@ -27,7 +30,7 @@ library that enhances Java's internationalization support by improving performance, keeping current with the Unicode Standard, and providing richer APIs. -For an introduction to Lucene's analysis API, see the [](xref:Lucene.Net.Analysis) package documentation. +For an introduction to Lucene's analysis API, see the package documentation. This module exposes the following functionality: @@ -84,8 +87,8 @@ For an introduction to Lucene's analysis API, see the [](xref:Lucene.Net.Analysi very slow.) * Effective Locale-specific normalization (case differences, diacritics, etc.). - ([](xref:Lucene.Net.Analysis.Core.LowerCaseFilter) and - [](xref:Lucene.Net.Analysis.Miscellaneous.ASCIIFoldingFilter) provide these services + ( and + provide these services in a generic way that doesn't take into account locale-specific needs.) ## Example Usages @@ -266,7 +269,7 @@ For an introduction to Lucene's analysis API, see the [](xref:Lucene.Net.Analysi # [Backwards Compatibility]() - This module exists to provide up-to-date Unicode functionality that supports the most recent version of Unicode (currently 6.3). However, some users who wish for stronger backwards compatibility can restrict [](xref:Lucene.Net.Analysis.Icu.ICUNormalizer2Filter) to operate on only a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}. + This module exists to provide up-to-date Unicode functionality that supports the most recent version of Unicode (currently 6.3). However, some users who wish for stronger backwards compatibility can restrict to operate on only a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}. ## Example Usages diff --git a/src/Lucene.Net.Analysis.Kuromoji/overview.md b/src/Lucene.Net.Analysis.Kuromoji/overview.md index 99acca2a85..8e5bcb1048 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/overview.md +++ b/src/Lucene.Net.Analysis.Kuromoji/overview.md @@ -1,4 +1,9 @@ - - - Apache Lucene Kuromoji Analyzer - + Kuromoji is a morphological analyzer for Japanese text. This module provides support for Japanese text analysis, including features such as part-of-speech tagging, lemmatization, and compound word analysis. - For an introduction to Lucene's analysis API, see the [](xref:Lucene.Net.Analysis) package documentation. \ No newline at end of file + For an introduction to Lucene's analysis API, see the package documentation. \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Phonetic/overview.md b/src/Lucene.Net.Analysis.Phonetic/overview.md index 77bee89ca6..164ece7a44 100644 --- a/src/Lucene.Net.Analysis.Phonetic/overview.md +++ b/src/Lucene.Net.Analysis.Phonetic/overview.md @@ -1,4 +1,9 @@ - - - analyzers-phonetic - + Analysis for indexing phonetic signatures (for sounds-alike search) - For an introduction to Lucene's analysis API, see the [](xref:Lucene.Net.Analysis) package documentation. + For an introduction to Lucene's analysis API, see the package documentation. This module provides analysis components (using encoders from [Apache Commons Codec](http://commons.apache.org/codec/)) that index and search phonetic signatures. \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/package.md b/src/Lucene.Net.Analysis.SmartCn/HHMM/package.md index eccb59d16a..d493f00b15 100644 --- a/src/Lucene.Net.Analysis.SmartCn/HHMM/package.md +++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/package.md @@ -16,7 +16,7 @@ limitations under the License. --> - + SmartChineseAnalyzer Hidden Markov Model package. @lucene.experimental \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.SmartCn/overview.md b/src/Lucene.Net.Analysis.SmartCn/overview.md index 0a7e1ff079..e844e2a1dc 100644 --- a/src/Lucene.Net.Analysis.SmartCn/overview.md +++ b/src/Lucene.Net.Analysis.SmartCn/overview.md @@ -15,10 +15,8 @@ limitations under the License. --> - - smartcn - + Analyzer for Simplified Chinese, which indexes words. - For an introduction to Lucene's analysis API, see the [](xref:Lucene.Net.Analysis) package documentation. \ No newline at end of file + For an introduction to Lucene's analysis API, see the package documentation. \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.SmartCn/package.md b/src/Lucene.Net.Analysis.SmartCn/package.md index 6afbed8c64..ad648d534d 100644 --- a/src/Lucene.Net.Analysis.SmartCn/package.md +++ b/src/Lucene.Net.Analysis.SmartCn/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Analysis.Smartcn +summary: *content +--- + + - + Analyzer for Simplified Chinese, which indexes words. @lucene.experimental diff --git a/src/Lucene.Net.Analysis.Stempel/overview.md b/src/Lucene.Net.Analysis.Stempel/overview.md index a31c1ae8bd..394ea91953 100644 --- a/src/Lucene.Net.Analysis.Stempel/overview.md +++ b/src/Lucene.Net.Analysis.Stempel/overview.md @@ -1,4 +1,9 @@ - - - - Benchmarking Lucene By Tasks - - + + + Benchmarking Lucene By Tasks.
@@ -495,5 +493,4 @@ Example: max.buffered=buf:10:10:100:100 -
 
- - \ No newline at end of file + diff --git a/src/Lucene.Net.Benchmark/overview.md b/src/Lucene.Net.Benchmark/overview.md index b786443914..2c2e6e1fb9 100644 --- a/src/Lucene.Net.Benchmark/overview.md +++ b/src/Lucene.Net.Benchmark/overview.md @@ -1,4 +1,9 @@ - - - benchmark - + benchmark \ No newline at end of file diff --git a/src/Lucene.Net.Benchmark/package.md b/src/Lucene.Net.Benchmark/package.md index b96f567b37..b9c74f9e9b 100644 --- a/src/Lucene.Net.Benchmark/package.md +++ b/src/Lucene.Net.Benchmark/package.md @@ -15,11 +15,9 @@ See the License for the specific language governing permissions and limitations under the License. --> - - - Lucene Benchmarking Package - - + + + The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora.
@@ -42,5 +40,4 @@ The benchmark contribution contains tools for benchmarking Lucene using standard The original code for these classes was donated by Andrzej Bialecki at http://issues.apache.org/jira/browse/LUCENE-675 and has been updated by Grant Ingersoll to make some parts of the code reusable in other benchmarkers
 
- - \ No newline at end of file + diff --git a/src/Lucene.Net.Classification/overview.md b/src/Lucene.Net.Classification/overview.md index fa0f140167..ecf2c144d1 100644 --- a/src/Lucene.Net.Classification/overview.md +++ b/src/Lucene.Net.Classification/overview.md @@ -1,4 +1,9 @@ - - - classification - + Provides a classification module which leverages Lucene index information. \ No newline at end of file diff --git a/src/Lucene.Net.Demo/overview.md b/src/Lucene.Net.Demo/overview.md index ad0bdd049b..4f87725d9b 100644 --- a/src/Lucene.Net.Demo/overview.md +++ b/src/Lucene.Net.Demo/overview.md @@ -1,4 +1,9 @@ - -Provides faceting capabilities over facets that were indexed with [](xref:Lucene.Net.Facet.Sortedset.SortedSetDocValuesFacetField). \ No newline at end of file +Provides faceting capabilities over facets that were indexed with . \ No newline at end of file diff --git a/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs b/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs index 2b907cb13d..c1301ae52f 100644 --- a/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs +++ b/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumFloatAssociations.cs @@ -32,7 +32,7 @@ namespace Lucene.Net.Facet.Taxonomy /// /// NOTE: This was TaxonomyFacetSumFloatAssociations in Lucene /// - /// @lucene.experimental + /// @lucene.experimental /// public class TaxonomyFacetSumSingleAssociations : SingleTaxonomyFacets { diff --git a/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs b/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs index 7702b25dba..68048182cf 100644 --- a/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs +++ b/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumIntAssociations.cs @@ -31,7 +31,7 @@ namespace Lucene.Net.Facet.Taxonomy /// /// NOTE: This was TaxonomyFacetSumIntAssociations in Lucene /// - /// @lucene.experimental + /// @lucene.experimental /// public class TaxonomyFacetSumInt32Associations : Int32TaxonomyFacets { diff --git a/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs b/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs index 438931d501..d362dfacb3 100644 --- a/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs +++ b/src/Lucene.Net.Facet/Taxonomy/TaxonomyFacetSumValueSource.cs @@ -37,7 +37,7 @@ namespace Lucene.Net.Facet.Taxonomy /// Aggregates sum of values from and , /// for each facet label. /// - /// @lucene.experimental + /// @lucene.experimental /// public class TaxonomyFacetSumValueSource : SingleTaxonomyFacets { diff --git a/src/Lucene.Net.Facet/package.md b/src/Lucene.Net.Facet/package.md index e3caf3ee25..d017190f4c 100644 --- a/src/Lucene.Net.Facet/package.md +++ b/src/Lucene.Net.Facet/package.md @@ -1,4 +1,9 @@ - -Support for grouping by [](xref:Lucene.Net.Queries.Function.ValueSource). \ No newline at end of file +Support for grouping by . \ No newline at end of file diff --git a/src/Lucene.Net.Grouping/Term/package.md b/src/Lucene.Net.Grouping/Term/package.md index f7dbcefb5f..3008b51bd9 100644 --- a/src/Lucene.Net.Grouping/Term/package.md +++ b/src/Lucene.Net.Grouping/Term/package.md @@ -15,4 +15,4 @@ limitations under the License. --> -Support for grouping by indexed terms via [](xref:Lucene.Net.Search.FieldCache). \ No newline at end of file +Support for grouping by indexed terms via . \ No newline at end of file diff --git a/src/Lucene.Net.Grouping/package.md b/src/Lucene.Net.Grouping/package.md index b5668efc82..4e85c1ea92 100644 --- a/src/Lucene.Net.Grouping/package.md +++ b/src/Lucene.Net.Grouping/package.md @@ -1,4 +1,9 @@ - - - Highlighter - + The highlight package contains classes to provide "keyword in context" features typically used to highlight search terms in the text of results pages. \ No newline at end of file diff --git a/src/Lucene.Net.Join/package.md b/src/Lucene.Net.Join/package.md index c21b5660e4..f79806d446 100644 --- a/src/Lucene.Net.Join/package.md +++ b/src/Lucene.Net.Join/package.md @@ -1,4 +1,9 @@ - - - memory - + memory \ No newline at end of file diff --git a/src/Lucene.Net.Memory/package.md b/src/Lucene.Net.Memory/package.md index f0b262c780..57a0b84f6c 100644 --- a/src/Lucene.Net.Memory/package.md +++ b/src/Lucene.Net.Memory/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Index.Memory +summary: *content +--- + + - - + + High-performance single-document main memory Apache Lucene fulltext search index. - - \ No newline at end of file + diff --git a/src/Lucene.Net.Misc/Index/Sorter/package.md b/src/Lucene.Net.Misc/Index/Sorter/package.md index b489a6e6ce..4d6056c3ce 100644 --- a/src/Lucene.Net.Misc/Index/Sorter/package.md +++ b/src/Lucene.Net.Misc/Index/Sorter/package.md @@ -22,10 +22,10 @@ reverse the order of the documents (by using SortField.Type.DOC in reverse). Multi-level sorts can be specified the same way you would when searching, by building Sort from multiple SortFields. -[](xref:Lucene.Net.Index.Sorter.SortingMergePolicy) can be used to + can be used to make Lucene sort segments before merging them. This will ensure that every segment resulting from a merge will be sorted according to the provided -[](xref:Lucene.Net.Search.Sort). This however makes merging and +. This however makes merging and thus indexing slower. Sorted segments allow for early query termination when the sort order diff --git a/src/Lucene.Net.Misc/overview.md b/src/Lucene.Net.Misc/overview.md index c47d76519b..f937f63a0b 100644 --- a/src/Lucene.Net.Misc/overview.md +++ b/src/Lucene.Net.Misc/overview.md @@ -1,4 +1,9 @@ - - - miscellaneous - + ## Misc Tools @@ -29,7 +32,7 @@ changing norms, finding high freq terms, and others. **NOTE**: This uses C++ sources (accessible via JNI), which you'll have to compile on your platform. -[](xref:Lucene.Net.Store.NativeUnixDirectory) is a Directory implementation that bypasses the + is a Directory implementation that bypasses the OS's buffer cache (using direct IO) for any IndexInput and IndexOutput used during merging of segments larger than a specified size (default 10 MB). This avoids evicting hot pages that are still in-use for diff --git a/src/Lucene.Net.Queries/overview.md b/src/Lucene.Net.Queries/overview.md index ba3f288b56..bf3d67edbe 100644 --- a/src/Lucene.Net.Queries/overview.md +++ b/src/Lucene.Net.Queries/overview.md @@ -1,4 +1,9 @@ - - - Queries - + Queries \ No newline at end of file diff --git a/src/Lucene.Net.QueryParser/Classic/package.md b/src/Lucene.Net.QueryParser/Classic/package.md index ee90202121..99f7d7094f 100644 --- a/src/Lucene.Net.QueryParser/Classic/package.md +++ b/src/Lucene.Net.QueryParser/Classic/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.QueryParsers.Classic +summary: *content +--- + + - - QueryParsers - + Apache Lucene QueryParsers. @@ -53,7 +56,7 @@ This project contains the new Lucene query parser implementation, which matches the syntax of the core QueryParser but offers a more modular architecture to enable customization. - It's currently divided in 2 main packages: * [](xref:Lucene.Net.QueryParsers.Flexible.Core): it contains the query parser API classes, which should be extended by query parser implementations. * [](xref:Lucene.Net.QueryParsers.Flexible.Standard): it contains the current Lucene query parser implementation using the new query parser API. + It's currently divided in 2 main packages: * : it contains the query parser API classes, which should be extended by query parser implementations. * : it contains the current Lucene query parser implementation using the new query parser API. ### Features @@ -88,8 +91,8 @@
QueryParser
This layer is the text parsing layer which simply transforms the -query text string into a [](xref:Lucene.Net.QueryParsers.Flexible.Core.Nodes.QueryNode) tree. Every text parser -must implement the interface [](xref:Lucene.Net.QueryParsers.Flexible.Core.Parser.SyntaxParser). +query text string into a tree. Every text parser +must implement the interface . Lucene default implementations implements it using JavaCC.
@@ -103,7 +106,7 @@ terms.
QueryBuilder
-The third layer is a configurable map of builders, which map [](xref:Lucene.Net.QueryParsers.Flexible.Core.Nodes.QueryNode) types to its specific +The third layer is a configurable map of builders, which map types to its specific builder that will transform the QueryNode into Lucene Query object.
@@ -116,15 +119,15 @@ builder that will transform the QueryNode into Lucene Query object. ### StandardQueryParser and QueryParserWrapper The classic Lucene query parser is located under -[](xref:Lucene.Net.QueryParsers.Classic). +. To make it simpler to use the new query parser -the class [](xref:Lucene.Net.QueryParsers.Flexible.Standard.StandardQueryParser) may be helpful, +the class may be helpful, specially for people that do not want to extend the Query Parser. It uses the default Lucene query processors, text parser and builders, so you don't need to worry about dealing with those. -[](xref:Lucene.Net.QueryParsers.Flexible.Standard.StandardQueryParser) usage: + usage: StandardQueryParser qpHelper = new StandardQueryParser(); StandardQueryConfigHandler config = qpHelper.getQueryConfigHandler(); diff --git a/src/Lucene.Net.Replicator/overview.md b/src/Lucene.Net.Replicator/overview.md index f52ce85542..c9d7d78e70 100644 --- a/src/Lucene.Net.Replicator/overview.md +++ b/src/Lucene.Net.Replicator/overview.md @@ -15,8 +15,6 @@ limitations under the License. --> - - replicator - + Provides index files replication capabilities. \ No newline at end of file diff --git a/src/Lucene.Net.Replicator/package.md b/src/Lucene.Net.Replicator/package.md index a628226e81..caa47532f0 100644 --- a/src/Lucene.Net.Replicator/package.md +++ b/src/Lucene.Net.Replicator/package.md @@ -1,4 +1,9 @@ - - - Sandbox - + Sandbox \ No newline at end of file diff --git a/src/Lucene.Net.Spatial/overview.md b/src/Lucene.Net.Spatial/overview.md index ebf20d2161..51b196790b 100644 --- a/src/Lucene.Net.Spatial/overview.md +++ b/src/Lucene.Net.Spatial/overview.md @@ -1,4 +1,9 @@ - - - suggest - + Auto-suggest and spellchecking support. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Analysis/package.md b/src/Lucene.Net.TestFramework/Analysis/package.md index abbc244f07..981c62097e 100644 --- a/src/Lucene.Net.TestFramework/Analysis/package.md +++ b/src/Lucene.Net.TestFramework/Analysis/package.md @@ -18,4 +18,4 @@ Support for testing analysis components. - The main classes of interest are: * [](xref:Lucene.Net.Analysis.BaseTokenStreamTestCase): Highly recommended to use its helper methods, (especially in conjunction with [](xref:Lucene.Net.Analysis.MockAnalyzer) or [](xref:Lucene.Net.Analysis.MockTokenizer)), as it contains many assertions and checks to catch bugs. * [](xref:Lucene.Net.Analysis.MockTokenizer): Tokenizer for testing. Tokenizer that serves as a replacement for WHITESPACE, SIMPLE, and KEYWORD tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test it wrapping this tokenizer instead for extra checks. * [](xref:Lucene.Net.Analysis.MockAnalyzer): Analyzer for testing. Analyzer that uses MockTokenizer for additional verification. If you are testing a custom component such as a queryparser or analyzer-wrapper that consumes analysis streams, its a great idea to test it with this analyzer instead. \ No newline at end of file + The main classes of interest are: * : Highly recommended to use its helper methods, (especially in conjunction with or ), as it contains many assertions and checks to catch bugs. * : Tokenizer for testing. Tokenizer that serves as a replacement for WHITESPACE, SIMPLE, and KEYWORD tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test it wrapping this tokenizer instead for extra checks. * : Analyzer for testing. Analyzer that uses MockTokenizer for additional verification. If you are testing a custom component such as a queryparser or analyzer-wrapper that consumes analysis streams, its a great idea to test it with this analyzer instead. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/Compressing/package.md b/src/Lucene.Net.TestFramework/Codecs/Compressing/package.md index 08aeed5f18..6ddc8ae90c 100644 --- a/src/Lucene.Net.TestFramework/Codecs/Compressing/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/Compressing/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Support for testing [](xref:Lucene.Net.Codecs.Compressing.CompressingStoredFieldsFormat). \ No newline at end of file +Support for testing . \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene40/package.md b/src/Lucene.Net.TestFramework/Codecs/Lucene40/package.md index a98655beea..0fc2c8c1a9 100644 --- a/src/Lucene.Net.TestFramework/Codecs/Lucene40/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene40/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Support for testing [](xref:Lucene.Net.Codecs.Lucene40.Lucene40PostingsFormat). \ No newline at end of file +Support for testing . \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene41/package.md b/src/Lucene.Net.TestFramework/Codecs/Lucene41/package.md index 456fa6bbd0..1b35629140 100644 --- a/src/Lucene.Net.TestFramework/Codecs/Lucene41/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene41/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Support for testing [](xref:Lucene.Net.Codecs.Lucene41.Lucene41Codec). \ No newline at end of file +Support for testing . \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/package.md b/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/package.md index fb0ab32742..8d11e535cf 100644 --- a/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Codec for testing that supports [](xref:Lucene.Net.Index.TermsEnum.Ord()) \ No newline at end of file +Codec for testing that supports [#ord()](xref:Lucene.Net.Index.TermsEnum) \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene42/package.md b/src/Lucene.Net.TestFramework/Codecs/Lucene42/package.md index 51235604a6..4e10dfd90a 100644 --- a/src/Lucene.Net.TestFramework/Codecs/Lucene42/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene42/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Support for testing [](xref:Lucene.Net.Codecs.Lucene42.Lucene42Codec). \ No newline at end of file +Support for testing . \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene45/package.md b/src/Lucene.Net.TestFramework/Codecs/Lucene45/package.md index 588c2e8f72..31f4f8d53e 100644 --- a/src/Lucene.Net.TestFramework/Codecs/Lucene45/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/Lucene45/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Support for testing [](xref:Lucene.Net.Codecs.Lucene45.Lucene45Codec). \ No newline at end of file +Support for testing . \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/MockSep/package.md b/src/Lucene.Net.TestFramework/Codecs/MockSep/package.md index caad729aa3..285255b0ff 100644 --- a/src/Lucene.Net.TestFramework/Codecs/MockSep/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/MockSep/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Very simple implementations of [](xref:Lucene.Net.Codecs.Sep) for testing. \ No newline at end of file +Very simple implementations of for testing. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Codecs/NestedPulsing/package.md b/src/Lucene.Net.TestFramework/Codecs/NestedPulsing/package.md index b9d4eb5836..e94d513659 100644 --- a/src/Lucene.Net.TestFramework/Codecs/NestedPulsing/package.md +++ b/src/Lucene.Net.TestFramework/Codecs/NestedPulsing/package.md @@ -16,4 +16,4 @@ limitations under the License. --> -Codec for testing that wraps [](xref:Lucene.Net.Codecs.Pulsing.PulsingPostingsFormat) with itself. \ No newline at end of file +Codec for testing that wraps with itself. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Index/package.md b/src/Lucene.Net.TestFramework/Index/package.md index 2d2255d790..71866dda8f 100644 --- a/src/Lucene.Net.TestFramework/Index/package.md +++ b/src/Lucene.Net.TestFramework/Index/package.md @@ -18,4 +18,4 @@ Support for testing of indexes. - The primary classes are: * [](xref:Lucene.Net.Index.RandomIndexWriter): Randomizes the indexing experience. [](xref:Lucene.Net.Index.MockRandomMergePolicy): MergePolicy that makes random decisions. \ No newline at end of file + The primary classes are: * : Randomizes the indexing experience. : MergePolicy that makes random decisions. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Search/package.md b/src/Lucene.Net.TestFramework/Search/package.md index 4a9f0da2ee..a8e59784d3 100644 --- a/src/Lucene.Net.TestFramework/Search/package.md +++ b/src/Lucene.Net.TestFramework/Search/package.md @@ -18,4 +18,4 @@ Support for testing search components. - The primary classes are: * [](xref:Lucene.Net.Search.QueryUtils): Useful methods for testing Query classes. [](xref:Lucene.Net.Search.ShardSearchingTestBase): Base class for simulating distributed search. \ No newline at end of file + The primary classes are: * : Useful methods for testing Query classes. : Base class for simulating distributed search. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Store/package.md b/src/Lucene.Net.TestFramework/Store/package.md index aa87950383..39fd766c2d 100644 --- a/src/Lucene.Net.TestFramework/Store/package.md +++ b/src/Lucene.Net.TestFramework/Store/package.md @@ -18,5 +18,5 @@ Support for testing store mechanisms. -The primary class is [](xref:Lucene.Net.Store.MockDirectoryWrapper), which +The primary class is , which wraps any Directory implementation and provides additional checks. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Util/Automaton/package.md b/src/Lucene.Net.TestFramework/Util/Automaton/package.md index c9888f2825..c21144f81f 100644 --- a/src/Lucene.Net.TestFramework/Util/Automaton/package.md +++ b/src/Lucene.Net.TestFramework/Util/Automaton/package.md @@ -16,5 +16,5 @@ limitations under the License. --> -Support for testing automata. The primary class is [](xref:Lucene.Net.Util.Automaton.AutomatonTestUtil), +Support for testing automata. The primary class is , which can generate random automata, has simplified implementations for testing, etc. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Util/package.md b/src/Lucene.Net.TestFramework/Util/package.md index 542b6a8979..2b3f06432f 100644 --- a/src/Lucene.Net.TestFramework/Util/package.md +++ b/src/Lucene.Net.TestFramework/Util/package.md @@ -16,5 +16,5 @@ limitations under the License. --> -General test support. The primary class is [](xref:Lucene.Net.Util.LuceneTestCase), +General test support. The primary class is , which extends JUnit with additional functionality. \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/overview.md b/src/Lucene.Net.TestFramework/overview.md index 429e1ceb4c..a29ad4c12c 100644 --- a/src/Lucene.Net.TestFramework/overview.md +++ b/src/Lucene.Net.TestFramework/overview.md @@ -1,4 +1,9 @@ - -API and code to convert text into indexable/searchable tokens. Covers [](xref:Lucene.Net.Analysis.Analyzer) and related classes. +API and code to convert text into indexable/searchable tokens. Covers and related classes. ## Parsing? Tokenization? Analysis! @@ -63,9 +68,9 @@ and proximity searches (though sentence identification is not provided by Lucene The analysis package provides the mechanism to convert Strings and Readers into tokens that can be indexed by Lucene. There are four main classes in the package from which all analysis processes are derived. These are: -* [](xref:Lucene.Net.Analysis.Analyzer) – An Analyzer is +* – An Analyzer is responsible for building a - [](xref:Lucene.Net.Analysis.TokenStream) which can be consumed + which can be consumed by the indexing and searching processes. See below for more information on implementing your own Analyzer. @@ -79,41 +84,41 @@ and proximity searches (though sentence identification is not provided by Lucene constructors and reset() methods accept a CharFilter. CharFilters may be chained to perform multiple pre-tokenization modifications. -* [](xref:Lucene.Net.Analysis.Tokenizer) – A Tokenizer is a - [](xref:Lucene.Net.Analysis.TokenStream) and is responsible for +* – A Tokenizer is a + and is responsible for breaking up incoming text into tokens. In most cases, an Analyzer will use a Tokenizer as the first step in the analysis process. However, to modify text prior to tokenization, use a CharStream subclass (see above). -* [](xref:Lucene.Net.Analysis.TokenFilter) – A TokenFilter is - also a [](xref:Lucene.Net.Analysis.TokenStream) and is responsible +* – A TokenFilter is + also a and is responsible for modifying tokens that have been created by the Tokenizer. Common modifications performed by a TokenFilter are: deletion, stemming, synonym injection, and down casing. Not all Analyzers require TokenFilters. ## Hints, Tips and Traps - The synergy between [](xref:Lucene.Net.Analysis.Analyzer) and [](xref:Lucene.Net.Analysis.Tokenizer) is sometimes confusing. To ease this confusion, some clarifications: + The synergy between and is sometimes confusing. To ease this confusion, some clarifications: -* The [](xref:Lucene.Net.Analysis.Analyzer) is responsible for the entire task of - creating tokens out of the input text, while the [](xref:Lucene.Net.Analysis.Tokenizer) +* The is responsible for the entire task of + creating tokens out of the input text, while the is only responsible for breaking the input text into tokens. Very likely, tokens created - by the [](xref:Lucene.Net.Analysis.Tokenizer) would be modified or even omitted - by the [](xref:Lucene.Net.Analysis.Analyzer) (via one or more - [](xref:Lucene.Net.Analysis.TokenFilter)s) before being returned. + by the would be modified or even omitted + by the (via one or more + s) before being returned. -* [](xref:Lucene.Net.Analysis.Tokenizer) is a [](xref:Lucene.Net.Analysis.TokenStream), - but [](xref:Lucene.Net.Analysis.Analyzer) is not. +* is a , + but is not. -* [](xref:Lucene.Net.Analysis.Analyzer) is "field aware", but - [](xref:Lucene.Net.Analysis.Tokenizer) is not. +* is "field aware", but + is not. Lucene Java provides a number of analysis capabilities, the most commonly used one being the StandardAnalyzer. Many applications will have a long and industrious life with nothing more than the StandardAnalyzer. However, there are a few other classes/packages that are worth mentioning: 1. PerFieldAnalyzerWrapper – Most Analyzers perform the same operation on all - [](xref:Lucene.Net.Documents.Field)s. The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different - [](xref:Lucene.Net.Documents.Field)s. + s. The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different + s. 2. The analysis library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety of different problems related to searching. Many of the Analyzers are designed to analyze non-English languages. @@ -127,7 +132,7 @@ and proximity searches (though sentence identification is not provided by Lucene Applications usually do not invoke analysis – Lucene does it for them: * At indexing, as a consequence of - [](xref:Lucene.Net.Index.IndexWriter.AddDocument(Iterable) addDocument(doc)), + [AddDocument](xref:Lucene.Net.Index.IndexWriter#methods), the Analyzer in effect for indexing is invoked for each indexed field of the added document. * At search, a QueryParser may invoke the Analyzer during parsing. Note that for some queries, analysis does not @@ -143,7 +148,7 @@ and proximity searches (though sentence identification is not provided by Lucene try { ts.reset(); // Resets this stream to the beginning. (Required) while (ts.incrementToken()) { - // Use [](xref:Lucene.Net.Util.AttributeSource.ReflectAsString(boolean)) + // Use [#reflectAsString(boolean)](xref:Lucene.Net.Util.AttributeSource) // for token stream debugging. System.out.println("token: " + ts.reflectAsString(true)); @@ -167,13 +172,13 @@ and proximity searches (though sentence identification is not provided by Lucene ### Field Section Boundaries - When [](xref:Lucene.Net.Documents.Document.Add(Lucene.Net.Index.IndexableField) document.Add(field)) is called multiple times for the same field name, we could say that each such call creates a new section for that field in that document. In fact, a separate call to [](xref:Lucene.Net.Analysis.Analyzer.TokenStream(java.Lang.String, java.Io.Reader) tokenStream(field,reader)) would take place for each of these so called "sections". However, the default Analyzer behavior is to treat all these sections as one large section. This allows phrase search and proximity search to seamlessly cross boundaries between these "sections". In other words, if a certain field "f" is added like this: + When [Document.add](xref:Lucene.Net.Documents.Document#methods) is called multiple times for the same field name, we could say that each such call creates a new section for that field in that document. In fact, a separate call to [TokenStream](xref:Lucene.Net.Analysis.Analyzer#methods) would take place for each of these so called "sections". However, the default Analyzer behavior is to treat all these sections as one large section. This allows phrase search and proximity search to seamlessly cross boundaries between these "sections". In other words, if a certain field "f" is added like this: document.add(new Field("f","first ends",...); document.add(new Field("f","starts two",...); indexWriter.addDocument(document); - Then, a phrase search for "ends starts" would find that document. Where desired, this behavior can be modified by introducing a "position gap" between consecutive field "sections", simply by overriding [](xref:Lucene.Net.Analysis.Analyzer.GetPositionIncrementGap(java.Lang.String) Analyzer.GetPositionIncrementGap(fieldName)): + Then, a phrase search for "ends starts" would find that document. Where desired, this behavior can be modified by introducing a "position gap" between consecutive field "sections", simply by overriding [Analyzer.getPositionIncrementGap](xref:Lucene.Net.Analysis.Analyzer#methods): Version matchVersion = Version.LUCENE_XY; // Substitute desired Lucene version for XY Analyzer myAnalyzer = new StandardAnalyzer(matchVersion) { @@ -184,7 +189,7 @@ and proximity searches (though sentence identification is not provided by Lucene ### Token Position Increments - By default, all tokens created by Analyzers and Tokenizers have a [](xref:Lucene.Net.Analysis.TokenAttributes.PositionIncrementAttribute.GetPositionIncrement() position increment) of one. This means that the position stored for that token in the index would be one more than that of the previous token. Recall that phrase and proximity searches rely on position info. + By default, all tokens created by Analyzers and Tokenizers have a [Increment](xref:Lucene.Net.Analysis.TokenAttributes.PositionIncrementAttribute#methods) of one. This means that the position stored for that token in the index would be one more than that of the previous token. Recall that phrase and proximity searches rely on position info. If the selected analyzer filters the stop words "is" and "the", then for a document containing the string "blue is the sky", only the tokens "blue", "sky" are indexed, with position("sky") = 3 + position("blue"). Now, a phrase query "blue is the sky" would find that document, because the same analyzer filters the same stop words from that query. But the phrase query "blue sky" would not find that document because the position increment between "blue" and "sky" is only 1. @@ -229,7 +234,7 @@ and proximity searches (though sentence identification is not provided by Lucene ### Token Position Length - By default, all tokens created by Analyzers and Tokenizers have a [](xref:Lucene.Net.Analysis.TokenAttributes.PositionLengthAttribute.GetPositionLength() position length) of one. This means that the token occupies a single position. This attribute is not indexed and thus not taken into account for positional queries, but is used by eg. suggesters. + By default, all tokens created by Analyzers and Tokenizers have a [Length](xref:Lucene.Net.Analysis.TokenAttributes.PositionLengthAttribute#methods) of one. This means that the token occupies a single position. This attribute is not indexed and thus not taken into account for positional queries, but is used by eg. suggesters. The main use case for positions lengths is multi-word synonyms. With single-word synonyms, setting the position increment to 0 is enough to denote the fact that two words are synonyms, for example: @@ -264,17 +269,17 @@ and proximity searches (though sentence identification is not provided by Lucene * Tokens that have the same start position must have the same start offset. * Tokens that have the same end position (taking into account the position length) must have the same end offset. -* Tokenizers must call [](xref:Lucene.Net.Util.AttributeSource.ClearAttributes()) in +* Tokenizers must call [#clearAttributes()](xref:Lucene.Net.Util.AttributeSource) in incrementToken(). -* Tokenizers must override [](xref:Lucene.Net.Analysis.TokenStream.End()), and pass the final +* Tokenizers must override [#end()](xref:Lucene.Net.Analysis.TokenStream), and pass the final offset (the total number of input characters processed) to both - parameters of [](xref:Lucene.Net.Analysis.TokenAttributes.OffsetAttribute.SetOffset(int, int)). + parameters of [Int)](xref:Lucene.Net.Analysis.TokenAttributes.OffsetAttribute#methods). Although these rules might seem easy to follow, problems can quickly happen when chaining badly implemented filters that play with positions and offsets, such as synonym or n-grams filters. Here are good practices for writing correct filters: * Token filters should not modify offsets. If you feel that your filter would need to modify offsets, then it should probably be implemented as a tokenizer. * Token filters should not insert positions. If a filter needs to add tokens, then they should all have a position increment of 0. -* When they add tokens, token filters should call [](xref:Lucene.Net.Util.AttributeSource.ClearAttributes()) first. +* When they add tokens, token filters should call [#clearAttributes()](xref:Lucene.Net.Util.AttributeSource) first. * When they remove tokens, token filters should increment the position increment of the following token. * Token filters should preserve position lengths. @@ -284,13 +289,13 @@ and proximity searches (though sentence identification is not provided by Lucene ### Attribute and AttributeSource - Classes [](xref:Lucene.Net.Util.Attribute) and [](xref:Lucene.Net.Util.AttributeSource) serve as the basis upon which the analysis elements of "Flexible Indexing" are implemented. An Attribute holds a particular piece of information about a text token. For example, [](xref:Lucene.Net.Analysis.TokenAttributes.CharTermAttribute) contains the term text of a token, and [](xref:Lucene.Net.Analysis.TokenAttributes.OffsetAttribute) contains the start and end character offsets of a token. An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also AttributeSources. + Classes and serve as the basis upon which the analysis elements of "Flexible Indexing" are implemented. An Attribute holds a particular piece of information about a text token. For example, contains the term text of a token, and contains the start and end character offsets of a token. An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also AttributeSources. Lucene provides seven Attributes out of the box: - + - + - + - + - + - + - + - +
[](xref:Lucene.Net.Analysis.TokenAttributes.CharTermAttribute) The term text of a token. Implements {@link java.lang.CharSequence} (providing methods length() and charAt(), and allowing e.g. for direct @@ -299,31 +304,31 @@ and proximity searches (though sentence identification is not provided by Lucene
[](xref:Lucene.Net.Analysis.TokenAttributes.OffsetAttribute) The start and end offset of a token in characters.
[](xref:Lucene.Net.Analysis.TokenAttributes.PositionIncrementAttribute) See above for detailed information about position increment.
[](xref:Lucene.Net.Analysis.TokenAttributes.PositionLengthAttribute) The number of positions occupied by a token.
[](xref:Lucene.Net.Analysis.TokenAttributes.PayloadAttribute) The payload that a Token can optionally have.
[](xref:Lucene.Net.Analysis.TokenAttributes.TypeAttribute) The type of the token. Default is 'word'.
[](xref:Lucene.Net.Analysis.TokenAttributes.FlagsAttribute) Optional flags a token can have.
[](xref:Lucene.Net.Analysis.TokenAttributes.KeywordAttribute) Keyword-aware TokenStreams/-Filters skip modification of tokens that return true from this attribute's isKeyword() method. @@ -343,48 +348,48 @@ The code fragment of the [analysis workflow protocol](#analysis-workflow) above shows a token stream being obtained, used, and then left for garbage. However, that does not mean that the components of that token stream will, in fact, be discarded. The default is just the -opposite. [](xref:Lucene.Net.Analysis.Analyzer) applies a reuse +opposite. applies a reuse strategy to the tokenizer and the token filters. It will reuse -them. For each new input, it calls [](xref:Lucene.Net.Analysis.Tokenizer.SetReader(java.Io.Reader)) +them. For each new input, it calls [#setReader(java.io.Reader)](xref:Lucene.Net.Analysis.Tokenizer) to set the input. Your components must be prepared for this scenario, as described below. #### Tokenizer -* You should create your tokenizer class by extending [](xref:Lucene.Net.Analysis.Tokenizer). +* You should create your tokenizer class by extending . * Your tokenizer must **never** make direct use of the {@link java.io.Reader} supplied to its constructor(s). (A future release of Apache Lucene may remove the reader parameters from the Tokenizer constructors.) - [](xref:Lucene.Net.Analysis.Tokenizer) wraps the reader in an + wraps the reader in an object that helps enforce that applications comply with the [analysis workflow](#analysis-workflow). Thus, your class should only reference the input via the protected 'input' field of Tokenizer. -* Your tokenizer **must** override [](xref:Lucene.Net.Analysis.TokenStream.End()). +* Your tokenizer **must** override [#end()](xref:Lucene.Net.Analysis.TokenStream). Your implementation **must** call `super.end()`. It must set a correct final offset into the offset attribute, and finish up and other attributes to reflect the end of the stream. -* If your tokenizer overrides [](xref:Lucene.Net.Analysis.TokenStream.Reset()) - or [](xref:Lucene.Net.Analysis.TokenStream.Close()), it +* If your tokenizer overrides [#reset()](xref:Lucene.Net.Analysis.TokenStream) + or [#close()](xref:Lucene.Net.Analysis.TokenStream), it **must** call the corresponding superclass method. #### Token Filter - You should create your token filter class by extending [](xref:Lucene.Net.Analysis.TokenFilter). - If your token filter overrides [](xref:Lucene.Net.Analysis.TokenStream.Reset()), - [](xref:Lucene.Net.Analysis.TokenStream.End()) - or [](xref:Lucene.Net.Analysis.TokenStream.Close()), it + You should create your token filter class by extending . + If your token filter overrides [#reset()](xref:Lucene.Net.Analysis.TokenStream), + [#end()](xref:Lucene.Net.Analysis.TokenStream) + or [#close()](xref:Lucene.Net.Analysis.TokenStream), it **must** call the corresponding superclass method. #### Creating delegates - Forwarding classes (those which extend [](xref:Lucene.Net.Analysis.Tokenizer) but delegate + Forwarding classes (those which extend but delegate selected logic to another tokenizer) must also set the reader to the delegate in the overridden - [](xref:Lucene.Net.Analysis.Tokenizer.Reset()) method, e.g.: + [#reset()](xref:Lucene.Net.Analysis.Tokenizer) method, e.g.: public class ForwardingTokenizer extends Tokenizer { private Tokenizer delegate; @@ -609,9 +614,9 @@ Now we're going to implement our own custom Attribute for part-of-speech tagging Now we also need to write the implementing class. The name of that class is important here: By default, Lucene checks if there is a class with the name of the Attribute with the suffix 'Impl'. In this example, we would consequently call the implementing class `PartOfSpeechAttributeImpl`. - This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions: [](xref:Lucene.Net.Util.AttributeSource.AttributeFactory). The factory accepts an Attribute interface as argument and returns an actual instance. You can implement your own factory if you need to change the default behavior. + This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions: . The factory accepts an Attribute interface as argument and returns an actual instance. You can implement your own factory if you need to change the default behavior. - Now here is the actual class that implements our new Attribute. Notice that the class has to extend [](xref:Lucene.Net.Util.AttributeImpl): + Now here is the actual class that implements our new Attribute. Notice that the class has to extend : public final class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute { @@ -759,7 +764,7 @@ Analyzers take Java {@link java.io.Reader}s as input. Of course you can wrap you to manipulate content, but this would have the big disadvantage that character offsets might be inconsistent with your original text. -[](xref:Lucene.Net.Analysis.CharFilter) is designed to allow you to pre-process input like a FilterReader would, but also + is designed to allow you to pre-process input like a FilterReader would, but also preserve the original offsets associated with those characters. This way mechanisms like highlighting still work correctly. CharFilters can be chained. diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsFormat.cs b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsFormat.cs index c88d8e7dfa..7dcda784b5 100644 --- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsFormat.cs +++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsFormat.cs @@ -48,7 +48,7 @@ public class CompressingStoredFieldsFormat : StoredFieldsFormat /// Create a new with an empty segment /// suffix. /// - /// + /// public CompressingStoredFieldsFormat(string formatName, CompressionMode compressionMode, int chunkSize) : this(formatName, "", compressionMode, chunkSize) { @@ -83,6 +83,7 @@ public CompressingStoredFieldsFormat(string formatName, CompressionMode compress /// to the size of your index). /// /// The name of the . + /// /// The to use. /// The minimum number of bytes of a single chunk of stored documents. /// diff --git a/src/Lucene.Net/Codecs/Compressing/package.md b/src/Lucene.Net/Codecs/Compressing/package.md index 021601362a..9b50655906 100644 --- a/src/Lucene.Net/Codecs/Compressing/package.md +++ b/src/Lucene.Net/Codecs/Compressing/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Codecs.Compressing +summary: *content +--- + + -The logical representation of a [](xref:Lucene.Net.Documents.Document) for indexing and searching. +The logical representation of a for indexing and searching. -The document package provides the user level logical representation of content to be indexed and searched. The package also provides utilities for working with [](xref:Lucene.Net.Documents.Document)s and [](xref:Lucene.Net.Index.IndexableField)s. +The document package provides the user level logical representation of content to be indexed and searched. The package also provides utilities for working with s and s. ## Document and IndexableField -A [](xref:Lucene.Net.Documents.Document) is a collection of [](xref:Lucene.Net.Index.IndexableField)s. A [](xref:Lucene.Net.Index.IndexableField) is a logical representation of a user's content that needs to be indexed or stored. [](xref:Lucene.Net.Index.IndexableField)s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, stored, etc.) See the [](xref:Lucene.Net.Documents.Field) implementation of [](xref:Lucene.Net.Index.IndexableField) for specifics on these properties. +A is a collection of s. A is a logical representation of a user's content that needs to be indexed or stored. s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, stored, etc.) See the implementation of for specifics on these properties. -Note: it is common to refer to [](xref:Lucene.Net.Documents.Document)s having [](xref:Lucene.Net.Documents.Field)s, even though technically they have [](xref:Lucene.Net.Index.IndexableField)s. +Note: it is common to refer to s having s, even though technically they have s. ## Working with Documents -First and foremost, a [](xref:Lucene.Net.Documents.Document) is something created by the user application. It is your job to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.) How this is done is completely up to you. That being said, there are many tools available in other projects that can make the process of taking a file and converting it into a Lucene [](xref:Lucene.Net.Documents.Document). +First and foremost, a is something created by the user application. It is your job to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.) How this is done is completely up to you. That being said, there are many tools available in other projects that can make the process of taking a file and converting it into a Lucene . -The [](xref:Lucene.Net.Documents.DateTools) is a utility class to make dates and times searchable (remember, Lucene only searches text). [](xref:Lucene.Net.Documents.IntField), [](xref:Lucene.Net.Documents.LongField), [](xref:Lucene.Net.Documents.FloatField) and [](xref:Lucene.Net.Documents.DoubleField) are a special helper class to simplify indexing of numeric values (and also dates) for fast range range queries with [](xref:Lucene.Net.Search.NumericRangeQuery) (using a special sortable string representation of numeric values). \ No newline at end of file +The is a utility class to make dates and times searchable (remember, Lucene only searches text). , , and are a special helper class to simplify indexing of numeric values (and also dates) for fast range range queries with (using a special sortable string representation of numeric values). \ No newline at end of file diff --git a/src/Lucene.Net/Index/package.md b/src/Lucene.Net/Index/package.md index a1f0996261..9d299c3aa8 100644 --- a/src/Lucene.Net/Index/package.md +++ b/src/Lucene.Net/Index/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Index +summary: *content +--- + + - - org.apache.lucene.search.payloads - - + + The payloads package provides Query mechanisms for finding and using payloads. - The following Query implementations are provided: 1. [](xref:Lucene.Net.Search.Payloads.PayloadTermQuery PayloadTermQuery) -- Boost a term's score based on the value of the payload located at that term. 2. [](xref:Lucene.Net.Search.Payloads.PayloadNearQuery PayloadNearQuery) -- A [](xref:Lucene.Net.Search.Spans.SpanNearQuery SpanNearQuery) that factors in the value of the payloads located at each of the positions where the spans occur. + The following Query implementations are provided: 1. [PayloadTermQuery](xref:Lucene.Net.Search.Payloads.PayloadTermQuery) -- Boost a term's score based on the value of the payload located at that term. 2. [PayloadNearQuery](xref:Lucene.Net.Search.Payloads.PayloadNearQuery) -- A [SpanNearQuery](xref:Lucene.Net.Search.Spans.SpanNearQuery) that factors in the value of the payloads located at each of the positions where the spans occur. + - - \ No newline at end of file diff --git a/src/Lucene.Net/Search/Similarities/package.md b/src/Lucene.Net/Search/Similarities/package.md index c655791ee6..242b1e0668 100644 --- a/src/Lucene.Net/Search/Similarities/package.md +++ b/src/Lucene.Net/Search/Similarities/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Search.Similarities +summary: *content +--- + + This package contains the various ranking models that can be used in Lucene. The -abstract class [](xref:Lucene.Net.Search.Similarities.Similarity) serves +abstract class serves as the base for ranking functions. For searching, users can employ the models already implemented or create their own by extending one of the classes in this package. @@ -28,28 +33,28 @@ package. ## Summary of the Ranking Methods -[](xref:Lucene.Net.Search.Similarities.DefaultSimilarity) is the original Lucene scoring function. It is based on a highly optimized [Vector Space Model](http://en.wikipedia.org/wiki/Vector_Space_Model). For more information, see [](xref:Lucene.Net.Search.Similarities.TFIDFSimilarity). + is the original Lucene scoring function. It is based on a highly optimized [Vector Space Model](http://en.wikipedia.org/wiki/Vector_Space_Model). For more information, see . -[](xref:Lucene.Net.Search.Similarities.BM25Similarity) is an optimized implementation of the successful Okapi BM25 model. + is an optimized implementation of the successful Okapi BM25 model. -[](xref:Lucene.Net.Search.Similarities.SimilarityBase) provides a basic implementation of the Similarity contract and exposes a highly simplified interface, which makes it an ideal starting point for new ranking functions. Lucene ships the following methods built on [](xref:Lucene.Net.Search.Similarities.SimilarityBase): * Amati and Rijsbergen's {@linkplain org.apache.lucene.search.similarities.DFRSimilarity DFR} framework; * Clinchant and Gaussier's {@linkplain org.apache.lucene.search.similarities.IBSimilarity Information-based models} for IR; * The implementation of two {@linkplain org.apache.lucene.search.similarities.LMSimilarity language models} from Zhai and Lafferty's paper. Since [](xref:Lucene.Net.Search.Similarities.SimilarityBase) is not optimized to the same extent as [](xref:Lucene.Net.Search.Similarities.DefaultSimilarity) and [](xref:Lucene.Net.Search.Similarities.BM25Similarity), a difference in performance is to be expected when using the methods listed above. However, optimizations can always be implemented in subclasses; see [below](#changingSimilarity). + provides a basic implementation of the Similarity contract and exposes a highly simplified interface, which makes it an ideal starting point for new ranking functions. Lucene ships the following methods built on : * Amati and Rijsbergen's {@linkplain org.apache.lucene.search.similarities.DFRSimilarity DFR} framework; * Clinchant and Gaussier's {@linkplain org.apache.lucene.search.similarities.IBSimilarity Information-based models} for IR; * The implementation of two {@linkplain org.apache.lucene.search.similarities.LMSimilarity language models} from Zhai and Lafferty's paper. Since is not optimized to the same extent as and , a difference in performance is to be expected when using the methods listed above. However, optimizations can always be implemented in subclasses; see [below](#changingSimilarity). ## Changing Similarity Chances are the available Similarities are sufficient for all your searching needs. However, in some applications it may be necessary to customize your [Similarity](Similarity.html) implementation. For instance, some applications do not need to distinguish between shorter and longer documents (see [a "fair" similarity](http://www.gossamer-threads.com/lists/lucene/java-user/38967#38967)). -To change [](xref:Lucene.Net.Search.Similarities.Similarity), one must do so for both indexing and searching, and the changes must happen before either of these actions take place. Although in theory there is nothing stopping you from changing mid-stream, it just isn't well-defined what is going to happen. +To change , one must do so for both indexing and searching, and the changes must happen before either of these actions take place. Although in theory there is nothing stopping you from changing mid-stream, it just isn't well-defined what is going to happen. -To make this change, implement your own [](xref:Lucene.Net.Search.Similarities.Similarity) (likely you'll want to simply subclass an existing method, be it [](xref:Lucene.Net.Search.Similarities.DefaultSimilarity) or a descendant of [](xref:Lucene.Net.Search.Similarities.SimilarityBase)), and then register the new class by calling [](xref:Lucene.Net.Index.IndexWriterConfig.SetSimilarity(Similarity)) before indexing and [](xref:Lucene.Net.Search.IndexSearcher.SetSimilarity(Similarity)) before searching. +To make this change, implement your own (likely you'll want to simply subclass an existing method, be it or a descendant of ), and then register the new class by calling [#setSimilarity(Similarity)](xref:Lucene.Net.Index.IndexWriterConfig) before indexing and [#setSimilarity(Similarity)](xref:Lucene.Net.Search.IndexSearcher) before searching. ### Extending {@linkplain org.apache.lucene.search.similarities.SimilarityBase} - The easiest way to quickly implement a new ranking method is to extend [](xref:Lucene.Net.Search.Similarities.SimilarityBase), which provides basic implementations for the low level . Subclasses are only required to implement the [](xref:Lucene.Net.Search.Similarities.SimilarityBase.Score(BasicStats, float, float)) and [](xref:Lucene.Net.Search.Similarities.SimilarityBase.ToString()) methods. + The easiest way to quickly implement a new ranking method is to extend , which provides basic implementations for the low level . Subclasses are only required to implement the [Float)](xref:Lucene.Net.Search.Similarities.SimilarityBase#methods) and [#toString()](xref:Lucene.Net.Search.Similarities.SimilarityBase) methods. -Another option is to extend one of the [frameworks](#framework) based on [](xref:Lucene.Net.Search.Similarities.SimilarityBase). These Similarities are implemented modularly, e.g. [](xref:Lucene.Net.Search.Similarities.DFRSimilarity) delegates computation of the three parts of its formula to the classes [](xref:Lucene.Net.Search.Similarities.BasicModel), [](xref:Lucene.Net.Search.Similarities.AfterEffect) and [](xref:Lucene.Net.Search.Similarities.Normalization). Instead of subclassing the Similarity, one can simply introduce a new basic model and tell [](xref:Lucene.Net.Search.Similarities.DFRSimilarity) to use it. +Another option is to extend one of the [frameworks](#framework) based on . These Similarities are implemented modularly, e.g. delegates computation of the three parts of its formula to the classes , and . Instead of subclassing the Similarity, one can simply introduce a new basic model and tell to use it. ### Changing {@linkplain org.apache.lucene.search.similarities.DefaultSimilarity} - If you are interested in use cases for changing your similarity, see the Lucene users's mailing list at [Overriding Similarity](http://www.gossamer-threads.com/lists/lucene/java-user/39125). In summary, here are a few use cases: 1.

The `SweetSpotSimilarity` in `org.apache.lucene.misc` gives small increases as the frequency increases a small amount and then greater increases when you hit the "sweet spot", i.e. where you think the frequency of terms is more significant.

2.

Overriding tf — In some applications, it doesn't matter what the score of a document is as long as a matching term occurs. In these cases people have overridden Similarity to return 1 from the tf() method.

3.

Changing Length Normalization — By overriding [](xref:Lucene.Net.Search.Similarities.Similarity.ComputeNorm(FieldInvertState state)), it is possible to discount how the length of a field contributes to a score. In [](xref:Lucene.Net.Search.Similarities.DefaultSimilarity), lengthNorm = 1 / (numTerms in field)^0.5, but if one changes this to be 1 / (numTerms in field), all fields will be treated ["fairly"](http://www.gossamer-threads.com/lists/lucene/java-user/38967#38967).

In general, Chris Hostetter sums it up best in saying (from [the Lucene users's mailing list](http://www.gossamer-threads.com/lists/lucene/java-user/39125#39125)): + If you are interested in use cases for changing your similarity, see the Lucene users's mailing list at [Overriding Similarity](http://www.gossamer-threads.com/lists/lucene/java-user/39125). In summary, here are a few use cases: 1.

The `SweetSpotSimilarity` in `org.apache.lucene.misc` gives small increases as the frequency increases a small amount and then greater increases when you hit the "sweet spot", i.e. where you think the frequency of terms is more significant.

2.

Overriding tf — In some applications, it doesn't matter what the score of a document is as long as a matching term occurs. In these cases people have overridden Similarity to return 1 from the tf() method.

3.

Changing Length Normalization — By overriding [State)](xref:Lucene.Net.Search.Similarities.Similarity#methods), it is possible to discount how the length of a field contributes to a score. In , lengthNorm = 1 / (numTerms in field)^0.5, but if one changes this to be 1 / (numTerms in field), all fields will be treated ["fairly"](http://www.gossamer-threads.com/lists/lucene/java-user/38967#38967).

In general, Chris Hostetter sums it up best in saying (from [the Lucene users's mailing list](http://www.gossamer-threads.com/lists/lucene/java-user/39125#39125)): > [One would override the Similarity in] ... any situation where you know more about your data then just that it's "text" is a situation where it *might* make sense to to override your Similarity method. \ No newline at end of file diff --git a/src/Lucene.Net/Search/Spans/package.md b/src/Lucene.Net/Search/Spans/package.md index 4f49917960..db79d828d9 100644 --- a/src/Lucene.Net/Search/Spans/package.md +++ b/src/Lucene.Net/Search/Spans/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Search.Spans +summary: *content +--- + + ## Query Classes #### - [](xref:Lucene.Net.Search.TermQuery TermQuery) + [TermQuery](xref:Lucene.Net.Search.TermQuery) -Of the various implementations of [](xref:Lucene.Net.Search.Query Query), the [](xref:Lucene.Net.Search.TermQuery TermQuery) is the easiest to understand and the most often used in applications. A [](xref:Lucene.Net.Search.TermQuery TermQuery) matches all the documents that contain the specified [](xref:Lucene.Net.Index.Term Term), which is a word that occurs in a certain [](xref:Lucene.Net.Documents.Field Field). Thus, a [](xref:Lucene.Net.Search.TermQuery TermQuery) identifies and scores all [](xref:Lucene.Net.Documents.Document Document)s that have a [](xref:Lucene.Net.Documents.Field Field) with the specified string in it. Constructing a [](xref:Lucene.Net.Search.TermQuery TermQuery) is as simple as: TermQuery tq = new TermQuery(new Term("fieldName", "term")); In this example, the [](xref:Lucene.Net.Search.Query Query) identifies all [](xref:Lucene.Net.Documents.Document Document)s that have the [](xref:Lucene.Net.Documents.Field Field) named "fieldName" containing the word "term". +Of the various implementations of [Query](xref:Lucene.Net.Search.Query), the [TermQuery](xref:Lucene.Net.Search.TermQuery) is the easiest to understand and the most often used in applications. A [TermQuery](xref:Lucene.Net.Search.TermQuery) matches all the documents that contain the specified [Term](xref:Lucene.Net.Index.Term), which is a word that occurs in a certain [Field](xref:Lucene.Net.Documents.Field). Thus, a [TermQuery](xref:Lucene.Net.Search.TermQuery) identifies and scores all [Document](xref:Lucene.Net.Documents.Document)s that have a [Field](xref:Lucene.Net.Documents.Field) with the specified string in it. Constructing a [TermQuery](xref:Lucene.Net.Search.TermQuery) is as simple as: TermQuery tq = new TermQuery(new Term("fieldName", "term")); In this example, the [Query](xref:Lucene.Net.Search.Query) identifies all [Document](xref:Lucene.Net.Documents.Document)s that have the [Field](xref:Lucene.Net.Documents.Field) named "fieldName" containing the word "term". #### - [](xref:Lucene.Net.Search.BooleanQuery BooleanQuery) + [BooleanQuery](xref:Lucene.Net.Search.BooleanQuery) -Things start to get interesting when one combines multiple [](xref:Lucene.Net.Search.TermQuery TermQuery) instances into a [](xref:Lucene.Net.Search.BooleanQuery BooleanQuery). A [](xref:Lucene.Net.Search.BooleanQuery BooleanQuery) contains multiple [](xref:Lucene.Net.Search.BooleanClause BooleanClause)s, where each clause contains a sub-query ([](xref:Lucene.Net.Search.Query Query) instance) and an operator (from [](xref:Lucene.Net.Search.BooleanClause.Occur BooleanClause.Occur)) describing how that sub-query is combined with the other clauses: 1.

[](xref:Lucene.Net.Search.BooleanClause.Occur.SHOULD SHOULD) — Use this operator when a clause can occur in the result set, but is not required. If a query is made up of all SHOULD clauses, then every document in the result set matches at least one of these clauses.

2.

[](xref:Lucene.Net.Search.BooleanClause.Occur.MUST MUST) — Use this operator when a clause is required to occur in the result set. Every document in the result set will match all such clauses.

3.

[](xref:Lucene.Net.Search.BooleanClause.Occur.MUST_NOT MUST NOT) — Use this operator when a clause must not occur in the result set. No document in the result set will match any such clauses.

Boolean queries are constructed by adding two or more [](xref:Lucene.Net.Search.BooleanClause BooleanClause) instances. If too many clauses are added, a [](xref:Lucene.Net.Search.BooleanQuery.TooManyClauses TooManyClauses) exception will be thrown during searching. This most often occurs when a [](xref:Lucene.Net.Search.Query Query) is rewritten into a [](xref:Lucene.Net.Search.BooleanQuery BooleanQuery) with many [](xref:Lucene.Net.Search.TermQuery TermQuery) clauses, for example by [](xref:Lucene.Net.Search.WildcardQuery WildcardQuery). The default setting for the maximum number of clauses 1024, but this can be changed via the static method [](xref:Lucene.Net.Search.BooleanQuery.SetMaxClauseCount(int)). +Things start to get interesting when one combines multiple [TermQuery](xref:Lucene.Net.Search.TermQuery) instances into a [BooleanQuery](xref:Lucene.Net.Search.BooleanQuery). A [BooleanQuery](xref:Lucene.Net.Search.BooleanQuery) contains multiple [BooleanClause](xref:Lucene.Net.Search.BooleanClause)s, where each clause contains a sub-query ([Query](xref:Lucene.Net.Search.Query) instance) and an operator (from [BooleanClause.Occur](xref:Lucene.Net.Search.BooleanClause.Occur)) describing how that sub-query is combined with the other clauses: 1.

[SHOULD](xref:Lucene.Net.Search.BooleanClause.Occur#methods) — Use this operator when a clause can occur in the result set, but is not required. If a query is made up of all SHOULD clauses, then every document in the result set matches at least one of these clauses.

2.

[MUST](xref:Lucene.Net.Search.BooleanClause.Occur#methods) — Use this operator when a clause is required to occur in the result set. Every document in the result set will match all such clauses.

3.

[NOT](xref:Lucene.Net.Search.BooleanClause.Occur#methods) — Use this operator when a clause must not occur in the result set. No document in the result set will match any such clauses.

Boolean queries are constructed by adding two or more [BooleanClause](xref:Lucene.Net.Search.BooleanClause) instances. If too many clauses are added, a [TooManyClauses](xref:Lucene.Net.Search.BooleanQuery.TooManyClauses) exception will be thrown during searching. This most often occurs when a [Query](xref:Lucene.Net.Search.Query) is rewritten into a [BooleanQuery](xref:Lucene.Net.Search.BooleanQuery) with many [TermQuery](xref:Lucene.Net.Search.TermQuery) clauses, for example by [WildcardQuery](xref:Lucene.Net.Search.WildcardQuery). The default setting for the maximum number of clauses 1024, but this can be changed via the static method [#setMaxClauseCount(int)](xref:Lucene.Net.Search.BooleanQuery). #### Phrases @@ -51,33 +56,33 @@ Another common search is to find documents containing certain phrases. This 1. -[](xref:Lucene.Net.Search.PhraseQuery PhraseQuery) — Matches a sequence of [](xref:Lucene.Net.Index.Term Term)s. [](xref:Lucene.Net.Search.PhraseQuery PhraseQuery) uses a slop factor to determine how many positions may occur between any two terms in the phrase and still be considered a match. The slop is 0 by default, meaning the phrase must match exactly. +[PhraseQuery](xref:Lucene.Net.Search.PhraseQuery) — Matches a sequence of [Term](xref:Lucene.Net.Index.Term)s. [PhraseQuery](xref:Lucene.Net.Search.PhraseQuery) uses a slop factor to determine how many positions may occur between any two terms in the phrase and still be considered a match. The slop is 0 by default, meaning the phrase must match exactly. 2. -[](xref:Lucene.Net.Search.MultiPhraseQuery MultiPhraseQuery) — A more general form of PhraseQuery that accepts multiple Terms for a position in the phrase. For example, this can be used to perform phrase queries that also incorporate synonyms. 3.

[](xref:Lucene.Net.Search.Spans.SpanNearQuery SpanNearQuery) — Matches a sequence of other [](xref:Lucene.Net.Search.Spans.SpanQuery SpanQuery) instances. [](xref:Lucene.Net.Search.Spans.SpanNearQuery SpanNearQuery) allows for much more complicated phrase queries since it is constructed from other [](xref:Lucene.Net.Search.Spans.SpanQuery SpanQuery) instances, instead of only [](xref:Lucene.Net.Search.TermQuery TermQuery) instances.

+[MultiPhraseQuery](xref:Lucene.Net.Search.MultiPhraseQuery) — A more general form of PhraseQuery that accepts multiple Terms for a position in the phrase. For example, this can be used to perform phrase queries that also incorporate synonyms. 3.

[SpanNearQuery](xref:Lucene.Net.Search.Spans.SpanNearQuery) — Matches a sequence of other [SpanQuery](xref:Lucene.Net.Search.Spans.SpanQuery) instances. [SpanNearQuery](xref:Lucene.Net.Search.Spans.SpanNearQuery) allows for much more complicated phrase queries since it is constructed from other [SpanQuery](xref:Lucene.Net.Search.Spans.SpanQuery) instances, instead of only [TermQuery](xref:Lucene.Net.Search.TermQuery) instances.

#### - [](xref:Lucene.Net.Search.TermRangeQuery TermRangeQuery) + [TermRangeQuery](xref:Lucene.Net.Search.TermRangeQuery) -The [](xref:Lucene.Net.Search.TermRangeQuery TermRangeQuery) matches all documents that occur in the exclusive range of a lower [](xref:Lucene.Net.Index.Term Term) and an upper [](xref:Lucene.Net.Index.Term Term) according to [](xref:Lucene.Net.Index.TermsEnum.GetComparator TermsEnum.GetComparator()). It is not intended for numerical ranges; use [](xref:Lucene.Net.Search.NumericRangeQuery NumericRangeQuery) instead. For example, one could find all documents that have terms beginning with the letters a through c. +The [TermRangeQuery](xref:Lucene.Net.Search.TermRangeQuery) matches all documents that occur in the exclusive range of a lower [Term](xref:Lucene.Net.Index.Term) and an upper [Term](xref:Lucene.Net.Index.Term) according to [TermsEnum.getComparator](xref:Lucene.Net.Index.TermsEnum#methods). It is not intended for numerical ranges; use [NumericRangeQuery](xref:Lucene.Net.Search.NumericRangeQuery) instead. For example, one could find all documents that have terms beginning with the letters a through c. #### - [](xref:Lucene.Net.Search.NumericRangeQuery NumericRangeQuery) + [NumericRangeQuery](xref:Lucene.Net.Search.NumericRangeQuery) -The [](xref:Lucene.Net.Search.NumericRangeQuery NumericRangeQuery) matches all documents that occur in a numeric range. For NumericRangeQuery to work, you must index the values using a one of the numeric fields ([](xref:Lucene.Net.Documents.IntField IntField), [](xref:Lucene.Net.Documents.LongField LongField), [](xref:Lucene.Net.Documents.FloatField FloatField), or [](xref:Lucene.Net.Documents.DoubleField DoubleField)). +The [NumericRangeQuery](xref:Lucene.Net.Search.NumericRangeQuery) matches all documents that occur in a numeric range. For NumericRangeQuery to work, you must index the values using a one of the numeric fields ([IntField](xref:Lucene.Net.Documents.IntField), [LongField](xref:Lucene.Net.Documents.LongField), [FloatField](xref:Lucene.Net.Documents.FloatField), or [DoubleField](xref:Lucene.Net.Documents.DoubleField)). #### - [](xref:Lucene.Net.Search.PrefixQuery PrefixQuery), - [](xref:Lucene.Net.Search.WildcardQuery WildcardQuery), - [](xref:Lucene.Net.Search.RegexpQuery RegexpQuery) + [PrefixQuery](xref:Lucene.Net.Search.PrefixQuery), + [WildcardQuery](xref:Lucene.Net.Search.WildcardQuery), + [RegexpQuery](xref:Lucene.Net.Search.RegexpQuery) -While the [](xref:Lucene.Net.Search.PrefixQuery PrefixQuery) has a different implementation, it is essentially a special case of the [](xref:Lucene.Net.Search.WildcardQuery WildcardQuery). The [](xref:Lucene.Net.Search.PrefixQuery PrefixQuery) allows an application to identify all documents with terms that begin with a certain string. The [](xref:Lucene.Net.Search.WildcardQuery WildcardQuery) generalizes this by allowing for the use of * (matches 0 or more characters) and ? (matches exactly one character) wildcards. Note that the [](xref:Lucene.Net.Search.WildcardQuery WildcardQuery) can be quite slow. Also note that [](xref:Lucene.Net.Search.WildcardQuery WildcardQuery) should not start with * and ?, as these are extremely slow. Some QueryParsers may not allow this by default, but provide a `setAllowLeadingWildcard` method to remove that protection. The [](xref:Lucene.Net.Search.RegexpQuery RegexpQuery) is even more general than WildcardQuery, allowing an application to identify all documents with terms that match a regular expression pattern. +While the [PrefixQuery](xref:Lucene.Net.Search.PrefixQuery) has a different implementation, it is essentially a special case of the [WildcardQuery](xref:Lucene.Net.Search.WildcardQuery). The [PrefixQuery](xref:Lucene.Net.Search.PrefixQuery) allows an application to identify all documents with terms that begin with a certain string. The [WildcardQuery](xref:Lucene.Net.Search.WildcardQuery) generalizes this by allowing for the use of * (matches 0 or more characters) and ? (matches exactly one character) wildcards. Note that the [WildcardQuery](xref:Lucene.Net.Search.WildcardQuery) can be quite slow. Also note that [WildcardQuery](xref:Lucene.Net.Search.WildcardQuery) should not start with * and ?, as these are extremely slow. Some QueryParsers may not allow this by default, but provide a `setAllowLeadingWildcard` method to remove that protection. The [RegexpQuery](xref:Lucene.Net.Search.RegexpQuery) is even more general than WildcardQuery, allowing an application to identify all documents with terms that match a regular expression pattern. #### - [](xref:Lucene.Net.Search.FuzzyQuery FuzzyQuery) + [FuzzyQuery](xref:Lucene.Net.Search.FuzzyQuery) -A [](xref:Lucene.Net.Search.FuzzyQuery FuzzyQuery) matches documents that contain terms similar to the specified term. Similarity is determined using [Levenshtein (edit) distance](http://en.wikipedia.org/wiki/Levenshtein). This type of query can be useful when accounting for spelling variations in the collection. +A [FuzzyQuery](xref:Lucene.Net.Search.FuzzyQuery) matches documents that contain terms similar to the specified term. Similarity is determined using [Levenshtein (edit) distance](http://en.wikipedia.org/wiki/Levenshtein). This type of query can be useful when accounting for spelling variations in the collection. ## Scoring — Introduction @@ -85,16 +90,16 @@ Lucene scoring is the heart of why we all love Lucene. It is blazingly fast and While this document won't answer your specific scoring issues, it will, hopefully, point you to the places that can help you figure out the *what* and *why* of Lucene scoring. -Lucene scoring supports a number of pluggable information retrieval [models](http://en.wikipedia.org/wiki/Information_retrieval#Model_types), including: * [Vector Space Model (VSM)](http://en.wikipedia.org/wiki/Vector_Space_Model) * [Probablistic Models](http://en.wikipedia.org/wiki/Probabilistic_relevance_model) such as [Okapi BM25](http://en.wikipedia.org/wiki/Probabilistic_relevance_model_(BM25)) and [DFR](http://en.wikipedia.org/wiki/Divergence-from-randomness_model) * [Language models](http://en.wikipedia.org/wiki/Language_model) These models can be plugged in via the [](xref:Lucene.Net.Search.Similarities Similarity API), and offer extension hooks and parameters for tuning. In general, Lucene first finds the documents that need to be scored based on boolean logic in the Query specification, and then ranks this subset of matching documents via the retrieval model. For some valuable references on VSM and IR in general refer to [Lucene Wiki IR references](http://wiki.apache.org/lucene-java/InformationRetrieval). +Lucene scoring supports a number of pluggable information retrieval [models](http://en.wikipedia.org/wiki/Information_retrieval#Model_types), including: * [Vector Space Model (VSM)](http://en.wikipedia.org/wiki/Vector_Space_Model) * [Probablistic Models](http://en.wikipedia.org/wiki/Probabilistic_relevance_model) such as [Okapi BM25](http://en.wikipedia.org/wiki/Probabilistic_relevance_model_(BM25)) and [DFR](http://en.wikipedia.org/wiki/Divergence-from-randomness_model) * [Language models](http://en.wikipedia.org/wiki/Language_model) These models can be plugged in via the [Similarity API](xref:Lucene.Net.Search.Similarities), and offer extension hooks and parameters for tuning. In general, Lucene first finds the documents that need to be scored based on boolean logic in the Query specification, and then ranks this subset of matching documents via the retrieval model. For some valuable references on VSM and IR in general refer to [Lucene Wiki IR references](http://wiki.apache.org/lucene-java/InformationRetrieval). -The rest of this document will cover [Scoring basics](#scoringBasics) and explain how to change your [](xref:Lucene.Net.Search.Similarities.Similarity Similarity). Next, it will cover ways you can customize the lucene internals in [Custom Queries -- Expert Level](#customQueriesExpert), which gives details on implementing your own [](xref:Lucene.Net.Search.Query Query) class and related functionality. Finally, we will finish up with some reference material in the [Appendix](#algorithm). +The rest of this document will cover [Scoring basics](#scoringBasics) and explain how to change your [Similarity](xref:Lucene.Net.Search.Similarities.Similarity). Next, it will cover ways you can customize the lucene internals in [Custom Queries -- Expert Level](#customQueriesExpert), which gives details on implementing your own [Query](xref:Lucene.Net.Search.Query) class and related functionality. Finally, we will finish up with some reference material in the [Appendix](#algorithm). ## Scoring — Basics Scoring is very much dependent on the way documents are indexed, so it is important to understand indexing. (see [Lucene overview]({@docRoot}/overview-summary.html#overview_description) before continuing on with this section) Be sure to use the useful - [](xref:Lucene.Net.Search.IndexSearcher.Explain(Lucene.Net.Search.Query, int) IndexSearcher.Explain(Query, doc)) + [Doc)](xref:Lucene.Net.Search.IndexSearcher#methods) to understand how the score for a certain matching document was computed. @@ -102,45 +107,45 @@ Generally, the Query determines which documents match (a binary decision), while #### Fields and Documents -In Lucene, the objects we are scoring are [](xref:Lucene.Net.Documents.Document Document)s. A Document is a collection of [](xref:Lucene.Net.Documents.Field Field)s. Each Field has [](xref:Lucene.Net.Documents.FieldType semantics) about how it is created and stored ([](xref:Lucene.Net.Documents.FieldType.Tokenized() tokenized), [](xref:Lucene.Net.Documents.FieldType.Stored() stored), etc). It is important to note that Lucene scoring works on Fields and then combines the results to return Documents. This is important because two Documents with the exact same content, but one having the content in two Fields and the other in one Field may return different scores for the same query due to length normalization. +In Lucene, the objects we are scoring are [Document](xref:Lucene.Net.Documents.Document)s. A Document is a collection of [Field](xref:Lucene.Net.Documents.Field)s. Each Field has [semantics](xref:Lucene.Net.Documents.FieldType) about how it is created and stored ([Tokenized](xref:Lucene.Net.Documents.FieldType#methods), [Stored](xref:Lucene.Net.Documents.FieldType#methods), etc). It is important to note that Lucene scoring works on Fields and then combines the results to return Documents. This is important because two Documents with the exact same content, but one having the content in two Fields and the other in one Field may return different scores for the same query due to length normalization. #### Score Boosting -Lucene allows influencing search results by "boosting" at different times: * **Index-time boost** by calling [](xref:Lucene.Net.Documents.Field.SetBoost(float) Field.SetBoost()) before a document is added to the index. * **Query-time boost** by setting a boost on a query clause, calling [](xref:Lucene.Net.Search.Query.SetBoost(float) Query.SetBoost()). +Lucene allows influencing search results by "boosting" at different times: * **Index-time boost** by calling [Field.setBoost](xref:Lucene.Net.Documents.Field#methods) before a document is added to the index. * **Query-time boost** by setting a boost on a query clause, calling [Query.setBoost](xref:Lucene.Net.Search.Query#methods). -Indexing time boosts are pre-processed for storage efficiency and written to storage for a field as follows: * All boosts of that field (i.e. all boosts under the same field name in that doc) are multiplied. * The boost is then encoded into a normalization value by the Similarity object at index-time: [](xref:Lucene.Net.Search.Similarities.Similarity.ComputeNorm computeNorm()). The actual encoding depends upon the Similarity implementation, but note that most use a lossy encoding (such as multiplying the boost with document length or similar, packed into a single byte!). * Decoding of any index-time normalization values and integration into the document's score is also performed at search time by the Similarity. +Indexing time boosts are pre-processed for storage efficiency and written to storage for a field as follows: * All boosts of that field (i.e. all boosts under the same field name in that doc) are multiplied. * The boost is then encoded into a normalization value by the Similarity object at index-time: [ComputeNorm](xref:Lucene.Net.Search.Similarities.Similarity#methods). The actual encoding depends upon the Similarity implementation, but note that most use a lossy encoding (such as multiplying the boost with document length or similar, packed into a single byte!). * Decoding of any index-time normalization values and integration into the document's score is also performed at search time by the Similarity. ## Changing Scoring — Similarity - Changing [](xref:Lucene.Net.Search.Similarities.Similarity Similarity) is an easy way to influence scoring, this is done at index-time with [](xref:Lucene.Net.Index.IndexWriterConfig.SetSimilarity(Lucene.Net.Search.Similarities.Similarity) IndexWriterConfig.SetSimilarity(Similarity)) and at query-time with [](xref:Lucene.Net.Search.IndexSearcher.SetSimilarity(Lucene.Net.Search.Similarities.Similarity) IndexSearcher.SetSimilarity(Similarity)). Be sure to use the same Similarity at query-time as at index-time (so that norms are encoded/decoded correctly); Lucene makes no effort to verify this. + Changing [Similarity](xref:Lucene.Net.Search.Similarities.Similarity) is an easy way to influence scoring, this is done at index-time with [IndexWriterConfig.setSimilarity](xref:Lucene.Net.Index.IndexWriterConfig#methods) and at query-time with [IndexSearcher.setSimilarity](xref:Lucene.Net.Search.IndexSearcher#methods). Be sure to use the same Similarity at query-time as at index-time (so that norms are encoded/decoded correctly); Lucene makes no effort to verify this. You can influence scoring by configuring a different built-in Similarity implementation, or by tweaking its parameters, subclassing it to override behavior. Some implementations also offer a modular API which you can extend by plugging in a different component (e.g. term frequency normalizer). - Finally, you can extend the low level [](xref:Lucene.Net.Search.Similarities.Similarity Similarity) directly to implement a new retrieval model, or to use external scoring factors particular to your application. For example, a custom Similarity can access per-document values via [](xref:Lucene.Net.Search.FieldCache FieldCache) or [](xref:Lucene.Net.Index.NumericDocValues) and integrate them into the score. + Finally, you can extend the low level [Similarity](xref:Lucene.Net.Search.Similarities.Similarity) directly to implement a new retrieval model, or to use external scoring factors particular to your application. For example, a custom Similarity can access per-document values via [FieldCache](xref:Lucene.Net.Search.FieldCache) or and integrate them into the score. - See the [](xref:Lucene.Net.Search.Similarities) package documentation for information on the built-in available scoring models and extending or changing Similarity. + See the package documentation for information on the built-in available scoring models and extending or changing Similarity. ## Custom Queries — Expert Level Custom queries are an expert level task, so tread carefully and be prepared to share your code if you want help. -With the warning out of the way, it is possible to change a lot more than just the Similarity when it comes to matching and scoring in Lucene. Lucene's search is a complex mechanism that is grounded by three main classes: 1. [](xref:Lucene.Net.Search.Query Query) — The abstract object representation of the user's information need. 2. [](xref:Lucene.Net.Search.Weight Weight) — The internal interface representation of the user's Query, so that Query objects may be reused. This is global (across all segments of the index) and generally will require global statistics (such as docFreq for a given term across all segments). 3. [](xref:Lucene.Net.Search.Scorer Scorer) — An abstract class containing common functionality for scoring. Provides both scoring and explanation capabilities. This is created per-segment. 4. [](xref:Lucene.Net.Search.BulkScorer BulkScorer) — An abstract class that scores a range of documents. A default implementation simply iterates through the hits from [](xref:Lucene.Net.Search.Scorer Scorer), but some queries such as [](xref:Lucene.Net.Search.BooleanQuery BooleanQuery) have more efficient implementations. Details on each of these classes, and their children, can be found in the subsections below. +With the warning out of the way, it is possible to change a lot more than just the Similarity when it comes to matching and scoring in Lucene. Lucene's search is a complex mechanism that is grounded by three main classes: 1. [Query](xref:Lucene.Net.Search.Query) — The abstract object representation of the user's information need. 2. [Weight](xref:Lucene.Net.Search.Weight) — The internal interface representation of the user's Query, so that Query objects may be reused. This is global (across all segments of the index) and generally will require global statistics (such as docFreq for a given term across all segments). 3. [Scorer](xref:Lucene.Net.Search.Scorer) — An abstract class containing common functionality for scoring. Provides both scoring and explanation capabilities. This is created per-segment. 4. [BulkScorer](xref:Lucene.Net.Search.BulkScorer) — An abstract class that scores a range of documents. A default implementation simply iterates through the hits from [Scorer](xref:Lucene.Net.Search.Scorer), but some queries such as [BooleanQuery](xref:Lucene.Net.Search.BooleanQuery) have more efficient implementations. Details on each of these classes, and their children, can be found in the subsections below. #### The Query Class -In some sense, the [](xref:Lucene.Net.Search.Query Query) class is where it all begins. Without a Query, there would be nothing to score. Furthermore, the Query class is the catalyst for the other scoring classes as it is often responsible for creating them or coordinating the functionality between them. The [](xref:Lucene.Net.Search.Query Query) class has several methods that are important for derived classes: 1. [](xref:Lucene.Net.Search.Query.CreateWeight(IndexSearcher) createWeight(IndexSearcher searcher)) — A [](xref:Lucene.Net.Search.Weight Weight) is the internal representation of the Query, so each Query implementation must provide an implementation of Weight. See the subsection on [The Weight Interface](#weightClass) below for details on implementing the Weight interface. 2. [](xref:Lucene.Net.Search.Query.Rewrite(IndexReader) rewrite(IndexReader reader)) — Rewrites queries into primitive queries. Primitive queries are: [](xref:Lucene.Net.Search.TermQuery TermQuery), [](xref:Lucene.Net.Search.BooleanQuery BooleanQuery), and other queries that implement [](xref:Lucene.Net.Search.Query.CreateWeight(IndexSearcher) createWeight(IndexSearcher searcher)) +In some sense, the [Query](xref:Lucene.Net.Search.Query) class is where it all begins. Without a Query, there would be nothing to score. Furthermore, the Query class is the catalyst for the other scoring classes as it is often responsible for creating them or coordinating the functionality between them. The [Query](xref:Lucene.Net.Search.Query) class has several methods that are important for derived classes: 1. [Searcher)](xref:Lucene.Net.Search.Query#methods) — A [Weight](xref:Lucene.Net.Search.Weight) is the internal representation of the Query, so each Query implementation must provide an implementation of Weight. See the subsection on [The Weight Interface](#weightClass) below for details on implementing the Weight interface. 2. [Reader)](xref:Lucene.Net.Search.Query#methods) — Rewrites queries into primitive queries. Primitive queries are: [TermQuery](xref:Lucene.Net.Search.TermQuery), [BooleanQuery](xref:Lucene.Net.Search.BooleanQuery), and other queries that implement [Searcher)](xref:Lucene.Net.Search.Query#methods) #### The Weight Interface -The [](xref:Lucene.Net.Search.Weight Weight) interface provides an internal representation of the Query so that it can be reused. Any [](xref:Lucene.Net.Search.IndexSearcher IndexSearcher) dependent state should be stored in the Weight implementation, not in the Query class. The interface defines five methods that must be implemented: 1. [](xref:Lucene.Net.Search.Weight.GetQuery getQuery()) — Pointer to the Query that this Weight represents. 2. [](xref:Lucene.Net.Search.Weight.GetValueForNormalization() getValueForNormalization()) — A weight can return a floating point value to indicate its magnitude for query normalization. Typically a weight such as TermWeight that scores via a [](xref:Lucene.Net.Search.Similarities.Similarity Similarity) will just defer to the Similarity's implementation: [](xref:Lucene.Net.Search.Similarities.Similarity.SimWeight.GetValueForNormalization SimWeight.getValueForNormalization()). For example, with [](xref:Lucene.Net.Search.Similarities.TFIDFSimilarity Lucene's classic vector-space formula), this is implemented as the sum of squared weights: `` 3. [](xref:Lucene.Net.Search.Weight.Normalize(float,float) normalize(float norm, float topLevelBoost)) — Performs query normalization: * `topLevelBoost`: A query-boost factor from any wrapping queries that should be multiplied into every document's score. For example, a TermQuery that is wrapped within a BooleanQuery with a boost of `5` would receive this value at this time. This allows the TermQuery (the leaf node in this case) to compute this up-front a single time (e.g. by multiplying into the IDF), rather than for every document. * `norm`: Passes in a a normalization factor which may allow for comparing scores between queries. Typically a weight such as TermWeight that scores via a [](xref:Lucene.Net.Search.Similarities.Similarity Similarity) will just defer to the Similarity's implementation: [](xref:Lucene.Net.Search.Similarities.Similarity.SimWeight.Normalize SimWeight.normalize(float,float)). 4. [](xref:Lucene.Net.Search.Weight.Scorer(Lucene.Net.Index.AtomicReaderContext, Lucene.Net.Util.Bits) scorer(AtomicReaderContext context, Bits acceptDocs)) — Construct a new [](xref:Lucene.Net.Search.Scorer Scorer) for this Weight. See [The Scorer Class](#scorerClass) below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents given the Query. 5. [](xref:Lucene.Net.Search.Weight.BulkScorer(Lucene.Net.Index.AtomicReaderContext, boolean, Lucene.Net.Util.Bits) scorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs)) — Construct a new [](xref:Lucene.Net.Search.BulkScorer BulkScorer) for this Weight. See [The BulkScorer Class](#bulkScorerClass) below for help defining a BulkScorer. This is an optional method, and most queries do not implement it. 6. [](xref:Lucene.Net.Search.Weight.Explain(Lucene.Net.Index.AtomicReaderContext, int) explain(AtomicReaderContext context, int doc)) — Provide a means for explaining why a given document was scored the way it was. Typically a weight such as TermWeight that scores via a [](xref:Lucene.Net.Search.Similarities.Similarity Similarity) will make use of the Similarity's implementation: [](xref:Lucene.Net.Search.Similarities.Similarity.SimScorer.Explain(int, Explanation) SimScorer.explain(int doc, Explanation freq)). +The [Weight](xref:Lucene.Net.Search.Weight) interface provides an internal representation of the Query so that it can be reused. Any [IndexSearcher](xref:Lucene.Net.Search.IndexSearcher) dependent state should be stored in the Weight implementation, not in the Query class. The interface defines five methods that must be implemented: 1. [GetQuery](xref:Lucene.Net.Search.Weight#methods) — Pointer to the Query that this Weight represents. 2. [GetValueForNormalization](xref:Lucene.Net.Search.Weight#methods) — A weight can return a floating point value to indicate its magnitude for query normalization. Typically a weight such as TermWeight that scores via a [Similarity](xref:Lucene.Net.Search.Similarities.Similarity) will just defer to the Similarity's implementation: [SimWeight#getValueForNormalization](xref:Lucene.Net.Search.Similarities.Similarity.SimWeight#methods). For example, with [Lucene's classic vector-space formula](xref:Lucene.Net.Search.Similarities.TFIDFSimilarity), this is implemented as the sum of squared weights: `` 3. [TopLevelBoost)](xref:Lucene.Net.Search.Weight#methods) — Performs query normalization: * `topLevelBoost`: A query-boost factor from any wrapping queries that should be multiplied into every document's score. For example, a TermQuery that is wrapped within a BooleanQuery with a boost of `5` would receive this value at this time. This allows the TermQuery (the leaf node in this case) to compute this up-front a single time (e.g. by multiplying into the IDF), rather than for every document. * `norm`: Passes in a a normalization factor which may allow for comparing scores between queries. Typically a weight such as TermWeight that scores via a [Similarity](xref:Lucene.Net.Search.Similarities.Similarity) will just defer to the Similarity's implementation: [SimWeight#normalize](xref:Lucene.Net.Search.Similarities.Similarity.SimWeight#methods). 4. [AcceptDocs)](xref:Lucene.Net.Search.Weight#methods) — Construct a new [Scorer](xref:Lucene.Net.Search.Scorer) for this Weight. See [The Scorer Class](#scorerClass) below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents given the Query. 5. [AcceptDocs)](xref:Lucene.Net.Search.Weight#methods) — Construct a new [BulkScorer](xref:Lucene.Net.Search.BulkScorer) for this Weight. See [The BulkScorer Class](#bulkScorerClass) below for help defining a BulkScorer. This is an optional method, and most queries do not implement it. 6. [Doc)](xref:Lucene.Net.Search.Weight#methods) — Provide a means for explaining why a given document was scored the way it was. Typically a weight such as TermWeight that scores via a [Similarity](xref:Lucene.Net.Search.Similarities.Similarity) will make use of the Similarity's implementation: [Freq)](xref:Lucene.Net.Search.Similarities.Similarity.SimScorer#methods). #### The Scorer Class -The [](xref:Lucene.Net.Search.Scorer Scorer) abstract class provides common scoring functionality for all Scorer implementations and is the heart of the Lucene scoring process. The Scorer defines the following abstract (some of them are not yet abstract, but will be in future versions and should be considered as such now) methods which must be implemented (some of them inherited from [](xref:Lucene.Net.Search.DocIdSetIterator DocIdSetIterator)): 1. [](xref:Lucene.Net.Search.Scorer.NextDoc nextDoc()) — Advances to the next document that matches this Query, returning true if and only if there is another document that matches. 2. [](xref:Lucene.Net.Search.Scorer.DocID docID()) — Returns the id of the [](xref:Lucene.Net.Documents.Document Document) that contains the match. 3. [](xref:Lucene.Net.Search.Scorer.Score score()) — Return the score of the current document. This value can be determined in any appropriate way for an application. For instance, the [](xref:Lucene.Net.Search.TermScorer TermScorer) simply defers to the configured Similarity: [](xref:Lucene.Net.Search.Similarities.Similarity.SimScorer.Score(int, float) SimScorer.Score(int doc, float freq)). 4. [](xref:Lucene.Net.Search.Scorer.Freq freq()) — Returns the number of matches for the current document. This value can be determined in any appropriate way for an application. For instance, the [](xref:Lucene.Net.Search.TermScorer TermScorer) simply defers to the term frequency from the inverted index: [](xref:Lucene.Net.Index.DocsEnum.Freq DocsEnum.Freq()). 5. [](xref:Lucene.Net.Search.Scorer.Advance advance()) — Skip ahead in the document matches to the document whose id is greater than or equal to the passed in value. In many instances, advance can be implemented more efficiently than simply looping through all the matching documents until the target document is identified. 6. [](xref:Lucene.Net.Search.Scorer.GetChildren getChildren()) — Returns any child subscorers underneath this scorer. This allows for users to navigate the scorer hierarchy and receive more fine-grained details on the scoring process. +The [Scorer](xref:Lucene.Net.Search.Scorer) abstract class provides common scoring functionality for all Scorer implementations and is the heart of the Lucene scoring process. The Scorer defines the following abstract (some of them are not yet abstract, but will be in future versions and should be considered as such now) methods which must be implemented (some of them inherited from [DocIdSetIterator](xref:Lucene.Net.Search.DocIdSetIterator)): 1. [NextDoc](xref:Lucene.Net.Search.Scorer#methods) — Advances to the next document that matches this Query, returning true if and only if there is another document that matches. 2. [DocID](xref:Lucene.Net.Search.Scorer#methods) — Returns the id of the [Document](xref:Lucene.Net.Documents.Document) that contains the match. 3. [Score](xref:Lucene.Net.Search.Scorer#methods) — Return the score of the current document. This value can be determined in any appropriate way for an application. For instance, the [TermScorer](xref:Lucene.Net.Search.TermScorer) simply defers to the configured Similarity: [Freq)](xref:Lucene.Net.Search.Similarities.Similarity.SimScorer#methods). 4. [Freq](xref:Lucene.Net.Search.Scorer#methods) — Returns the number of matches for the current document. This value can be determined in any appropriate way for an application. For instance, the [TermScorer](xref:Lucene.Net.Search.TermScorer) simply defers to the term frequency from the inverted index: [DocsEnum.freq](xref:Lucene.Net.Index.DocsEnum#methods). 5. [Advance](xref:Lucene.Net.Search.Scorer#methods) — Skip ahead in the document matches to the document whose id is greater than or equal to the passed in value. In many instances, advance can be implemented more efficiently than simply looping through all the matching documents until the target document is identified. 6. [GetChildren](xref:Lucene.Net.Search.Scorer#methods) — Returns any child subscorers underneath this scorer. This allows for users to navigate the scorer hierarchy and receive more fine-grained details on the scoring process. #### The BulkScorer Class -The [](xref:Lucene.Net.Search.BulkScorer BulkScorer) scores a range of documents. There is only one abstract method: 1. [](xref:Lucene.Net.Search.BulkScorer.Score(Lucene.Net.Search.Collector,int) score(Collector,int)) — Score all documents up to but not including the specified max document. +The [BulkScorer](xref:Lucene.Net.Search.BulkScorer) scores a range of documents. There is only one abstract method: 1. [Score](xref:Lucene.Net.Search.BulkScorer#methods) — Score all documents up to but not including the specified max document. #### Why would I want to add my own Query? @@ -150,14 +155,14 @@ In a nutshell, you want to add your own custom Query implementation when you thi This section is mostly notes on stepping through the Scoring process and serves as fertilizer for the earlier sections. -In the typical search application, a [](xref:Lucene.Net.Search.Query Query) is passed to the [](xref:Lucene.Net.Search.IndexSearcher IndexSearcher), beginning the scoring process. +In the typical search application, a [Query](xref:Lucene.Net.Search.Query) is passed to the [IndexSearcher](xref:Lucene.Net.Search.IndexSearcher), beginning the scoring process. -Once inside the IndexSearcher, a [](xref:Lucene.Net.Search.Collector Collector) is used for the scoring and sorting of the search results. These important objects are involved in a search: 1. The [](xref:Lucene.Net.Search.Weight Weight) object of the Query. The Weight object is an internal representation of the Query that allows the Query to be reused by the IndexSearcher. 2. The IndexSearcher that initiated the call. 3. A [](xref:Lucene.Net.Search.Filter Filter) for limiting the result set. Note, the Filter may be null. 4. A [](xref:Lucene.Net.Search.Sort Sort) object for specifying how to sort the results if the standard score-based sort method is not desired. +Once inside the IndexSearcher, a [Collector](xref:Lucene.Net.Search.Collector) is used for the scoring and sorting of the search results. These important objects are involved in a search: 1. The [Weight](xref:Lucene.Net.Search.Weight) object of the Query. The Weight object is an internal representation of the Query that allows the Query to be reused by the IndexSearcher. 2. The IndexSearcher that initiated the call. 3. A [Filter](xref:Lucene.Net.Search.Filter) for limiting the result set. Note, the Filter may be null. 4. A [Sort](xref:Lucene.Net.Search.Sort) object for specifying how to sort the results if the standard score-based sort method is not desired. -Assuming we are not sorting (since sorting doesn't affect the raw Lucene score), we call one of the search methods of the IndexSearcher, passing in the [](xref:Lucene.Net.Search.Weight Weight) object created by [](xref:Lucene.Net.Search.IndexSearcher.CreateNormalizedWeight(Lucene.Net.Search.Query) IndexSearcher.CreateNormalizedWeight(Query)), [](xref:Lucene.Net.Search.Filter Filter) and the number of results we want. This method returns a [](xref:Lucene.Net.Search.TopDocs TopDocs) object, which is an internal collection of search results. The IndexSearcher creates a [](xref:Lucene.Net.Search.TopScoreDocCollector TopScoreDocCollector) and passes it along with the Weight, Filter to another expert search method (for more on the [](xref:Lucene.Net.Search.Collector Collector) mechanism, see [](xref:Lucene.Net.Search.IndexSearcher IndexSearcher)). The TopScoreDocCollector uses a [](xref:Lucene.Net.Util.PriorityQueue PriorityQueue) to collect the top results for the search. +Assuming we are not sorting (since sorting doesn't affect the raw Lucene score), we call one of the search methods of the IndexSearcher, passing in the [Weight](xref:Lucene.Net.Search.Weight) object created by [IndexSearcher.createNormalizedWeight](xref:Lucene.Net.Search.IndexSearcher#methods), [Filter](xref:Lucene.Net.Search.Filter) and the number of results we want. This method returns a [TopDocs](xref:Lucene.Net.Search.TopDocs) object, which is an internal collection of search results. The IndexSearcher creates a [TopScoreDocCollector](xref:Lucene.Net.Search.TopScoreDocCollector) and passes it along with the Weight, Filter to another expert search method (for more on the [Collector](xref:Lucene.Net.Search.Collector) mechanism, see [IndexSearcher](xref:Lucene.Net.Search.IndexSearcher)). The TopScoreDocCollector uses a [PriorityQueue](xref:Lucene.Net.Util.PriorityQueue) to collect the top results for the search. -If a Filter is being used, some initial setup is done to determine which docs to include. Otherwise, we ask the Weight for a [](xref:Lucene.Net.Search.Scorer Scorer) for each [](xref:Lucene.Net.Index.IndexReader IndexReader) segment and proceed by calling [](xref:Lucene.Net.Search.BulkScorer.Score(Lucene.Net.Search.Collector) BulkScorer.Score(Collector)). +If a Filter is being used, some initial setup is done to determine which docs to include. Otherwise, we ask the Weight for a [Scorer](xref:Lucene.Net.Search.Scorer) for each [IndexReader](xref:Lucene.Net.Index.IndexReader) segment and proceed by calling [BulkScorer.score](xref:Lucene.Net.Search.BulkScorer#methods). -At last, we are actually going to score some documents. The score method takes in the Collector (most likely the TopScoreDocCollector or TopFieldCollector) and does its business.Of course, here is where things get involved. The [](xref:Lucene.Net.Search.Scorer Scorer) that is returned by the [](xref:Lucene.Net.Search.Weight Weight) object depends on what type of Query was submitted. In most real world applications with multiple query terms, the [](xref:Lucene.Net.Search.Scorer Scorer) is going to be a `BooleanScorer2` created from [](xref:Lucene.Net.Search.BooleanQuery.BooleanWeight BooleanWeight) (see the section on [custom queries](#customQueriesExpert) for info on changing this). +At last, we are actually going to score some documents. The score method takes in the Collector (most likely the TopScoreDocCollector or TopFieldCollector) and does its business.Of course, here is where things get involved. The [Scorer](xref:Lucene.Net.Search.Scorer) that is returned by the [Weight](xref:Lucene.Net.Search.Weight) object depends on what type of Query was submitted. In most real world applications with multiple query terms, the [Scorer](xref:Lucene.Net.Search.Scorer) is going to be a `BooleanScorer2` created from [BooleanWeight](xref:Lucene.Net.Search.BooleanQuery.BooleanWeight) (see the section on [custom queries](#customQueriesExpert) for info on changing this). -Assuming a BooleanScorer2, we first initialize the Coordinator, which is used to apply the coord() factor. We then get a internal Scorer based on the required, optional and prohibited parts of the query. Using this internal Scorer, the BooleanScorer2 then proceeds into a while loop based on the [](xref:Lucene.Net.Search.Scorer.NextDoc Scorer.NextDoc()) method. The nextDoc() method advances to the next document matching the query. This is an abstract method in the Scorer class and is thus overridden by all derived implementations. If you have a simple OR query your internal Scorer is most likely a DisjunctionSumScorer, which essentially combines the scorers from the sub scorers of the OR'd terms. \ No newline at end of file +Assuming a BooleanScorer2, we first initialize the Coordinator, which is used to apply the coord() factor. We then get a internal Scorer based on the required, optional and prohibited parts of the query. Using this internal Scorer, the BooleanScorer2 then proceeds into a while loop based on the [Scorer.nextDoc](xref:Lucene.Net.Search.Scorer#methods) method. The nextDoc() method advances to the next document matching the query. This is an abstract method in the Scorer class and is thus overridden by all derived implementations. If you have a simple OR query your internal Scorer is most likely a DisjunctionSumScorer, which essentially combines the scorers from the sub scorers of the OR'd terms. \ No newline at end of file diff --git a/src/Lucene.Net/Store/package.md b/src/Lucene.Net/Store/package.md index e8d2ba6900..665ba5d5cc 100644 --- a/src/Lucene.Net/Store/package.md +++ b/src/Lucene.Net/Store/package.md @@ -1,4 +1,9 @@ - +--- +uid: Lucene.Net.Store +summary: *content +--- + + + +- [Lucene.Net](https://www.nuget.org/packages/Lucene.Net/) - Core library +- [Lucene.Net.Analysis.Common](https://www.nuget.org/packages/Lucene.Net.Analysis.Common/) - Analyzers for indexing content in different languages and domains +- [Lucene.Net.Analysis.Kuromoji](https://www.nuget.org/packages/Lucene.Net.Analysis.Kuromoji/) - Japanese Morphological Analyzer +- [Lucene.Net.Analysis.Phonetic](https://www.nuget.org/packages/Lucene.Net.Analysis.Phonetic/) - Analyzer for indexing phonetic signatures (for sounds-alike search) +- [Lucene.Net.Analysis.SmartCn](https://www.nuget.org/packages/Lucene.Net.Analysis.SmartCn/) - Analyzer for indexing Chinese +- [Lucene.Net.Analysis.Stempel](https://www.nuget.org/packages/Lucene.Net.Analysis.Stempel/) - Analyzer for indexing Polish +- [Lucene.Net.Benchmark](https://www.nuget.org/packages/Lucene.Net.Benchmark/) - System for benchmarking Lucene +- [Lucene.Net.Classification](https://www.nuget.org/packages/Lucene.Net.Classification/) - Classification module for Lucene +- [Lucene.Net.Codecs](https://www.nuget.org/packages/Lucene.Net.Codecs/) - Lucene codecs and postings formats +- [Lucene.Net.Expressions](https://www.nuget.org/packages/Lucene.Net.Expressions/) - Dynamically computed values to sort/facet/search on based on a pluggable grammar +- [Lucene.Net.Facet](https://www.nuget.org/packages/Lucene.Net.Facet/) - Faceted indexing and search capabilities +- [Lucene.Net.Grouping](https://www.nuget.org/packages/Lucene.Net.Grouping/) - Collectors for grouping search results +- [Lucene.Net.Highlighter](https://www.nuget.org/packages/Lucene.Net.Highlighter/) - Highlights search keywords in results +- [Lucene.Net.ICU](https://www.nuget.org/packages/Lucene.Net.ICU/) - Specialized ICU (International Components for Unicode) Analyzers and Highlighters +- [Lucene.Net.Join](https://www.nuget.org/packages/Lucene.Net.Join/) - Index-time and Query-time joins for normalized content +- [Lucene.Net.Memory](https://www.nuget.org/packages/Lucene.Net.Memory/) - Single-document in-memory index implementation +- [Lucene.Net.Misc](https://www.nuget.org/packages/Lucene.Net.Misc/) - Index tools and other miscellaneous code +- [Lucene.Net.Queries](https://www.nuget.org/packages/Lucene.Net.Queries/) - Filters and Queries that add to core Lucene +- [Lucene.Net.QueryParser](https://www.nuget.org/packages/Lucene.Net.QueryParser/) - Text to Query parsers and parsing framework +- [Lucene.Net.Replicator](https://www.nuget.org/packages/Lucene.Net.Replicator/) Files replication utility +- [Lucene.Net.Sandbox](https://www.nuget.org/packages/Lucene.Net.Sandbox/) - Various third party contributions and new ideas +- [Lucene.Net.Spatial](https://www.nuget.org/packages/Lucene.Net.Spatial/) - Geospatial search +- [Lucene.Net.Suggest](https://www.nuget.org/packages/Lucene.Net.Suggest/) - Auto-suggest and Spellchecking support + +### Remaining work + +See __[Current Status](xref:contributing/current-status)__ for more details on the remaining work + +This version is a direct port of the Java Lucene project at [this release](https://github.com/apache/lucene-solr/releases/tag/releases%2Flucene-solr%2F4.8.0) \ No newline at end of file diff --git a/websites/site/index.md b/websites/site/index.md new file mode 100644 index 0000000000..843d871f3d --- /dev/null +++ b/websites/site/index.md @@ -0,0 +1,18 @@ +--- +title: Welcome to the Lucene.Net website! +description: Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users. +documentType: index +--- + +Lucene.Net +=============== + +

About the project

+ +Lucene.Net is a port of the Lucene search engine library, written in C# and targeted at .NET runtime users + +### Our Goals + +* Maintain the existing line-by-line port from Java to C#, fully automating and commoditizing the process such that the project can easily synchronize with the Java Lucene release schedule +* Maintaining the high-performance requirements expected of a first class C# search engine library +* Maximize usability and power when used within the .NET runtime. To that end, it will present a highly idiomatic, carefully tailored API that takes advantage of many of the special features of the .NET runtime \ No newline at end of file diff --git a/websites/site/lucenetemplate/index.html.tmpl b/websites/site/lucenetemplate/index.html.tmpl new file mode 100644 index 0000000000..de41d617b1 --- /dev/null +++ b/websites/site/lucenetemplate/index.html.tmpl @@ -0,0 +1,58 @@ +{{!Copyright (c) Microsoft. All rights reserved. Licensed under the MIT license. See LICENSE file in the project root for full license information.}} +{{!include(/^styles/.*/)}} +{{!include(/^fonts/.*/)}} +{{!include(favicon.ico)}} +{{!include(logo.svg)}} + + + + {{>partials/head-content}} + +
+
+ {{>partials/navbar}} +
+
+
+

Lucene.Net is a high performance search engine library for .NET

+
+
+ Install-Package Lucene.Net -Pre +
+
+ +
+
+{{>partials/home-quick-start}} +
+
+
+ {{{conceptual}}} +
+
+
+
+
+
+
+
+ {{>partials/footer}} +
+ {{>partials/scripts}} + + \ No newline at end of file diff --git a/websites/site/lucenetemplate/partials/head-content.tmpl.partial b/websites/site/lucenetemplate/partials/head-content.tmpl.partial new file mode 100644 index 0000000000..50cc7c5172 --- /dev/null +++ b/websites/site/lucenetemplate/partials/head-content.tmpl.partial @@ -0,0 +1,27 @@ +{{!Copyright (c) Microsoft. All rights reserved. Licensed under the MIT license. See LICENSE file in the project root for full license information.}} + + + + + {{#title}}{{title}}{{/title}}{{^title}}{{>partials/title}}{{/title}} {{#_appTitle}}| {{_appTitle}} {{/_appTitle}} + + + + {{#_description}}{{/_description}} + + + + + + + {{#_noindex}}{{/_noindex}} + {{#_enableSearch}}{{/_enableSearch}} + {{#_enableNewTab}}{{/_enableNewTab}} + + + + + + + + diff --git a/websites/site/lucenetemplate/partials/head.tmpl.partial b/websites/site/lucenetemplate/partials/head.tmpl.partial new file mode 100644 index 0000000000..2a02a275dc --- /dev/null +++ b/websites/site/lucenetemplate/partials/head.tmpl.partial @@ -0,0 +1,24 @@ +{{!Copyright (c) Microsoft. All rights reserved. Licensed under the MIT license. See LICENSE file in the project root for full license information.}} + + + + + {{#title}}{{title}}{{/title}}{{^title}}{{>partials/title}}{{/title}} {{#_appTitle}}| {{_appTitle}} {{/_appTitle}} + + + + {{#_description}}{{/_description}} + + + + + + + {{#_noindex}}{{/_noindex}} + {{#_enableSearch}}{{/_enableSearch}} + {{#_enableNewTab}}{{/_enableNewTab}} + + + + + diff --git a/websites/site/lucenetemplate/partials/home-quick-start.tmpl.partial b/websites/site/lucenetemplate/partials/home-quick-start.tmpl.partial new file mode 100644 index 0000000000..1d441dd04d --- /dev/null +++ b/websites/site/lucenetemplate/partials/home-quick-start.tmpl.partial @@ -0,0 +1,70 @@ +
+
+
+
+

Create an index and define a text analyzer

+
+// Ensures index backwards compatibility
+var AppLuceneVersion = LuceneVersion.LUCENE_48;
+
+var indexLocation = @"C:\Index";
+var dir = FSDirectory.Open(indexLocation);
+
+//create an analyzer to process the text
+var analyzer = new StandardAnalyzer(AppLuceneVersion);
+
+//create an index writer
+var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);
+var writer = new IndexWriter(dir, indexConfig);
+
+
+
+
+

Add to the index

+
+var source = new
+{
+    Name = "Kermit the Frog",
+    FavouritePhrase = "The quick brown fox jumps over the lazy dog"
+};
+var doc = new Document();
+// StringField indexes but doesn't tokenise
+doc.Add(new StringField("name", source.Name, Field.Store.YES));
+
+doc.Add(new TextField("favouritePhrase", source.FavouritePhrase, Field.Store.YES));
+
+writer.AddDocument(doc);
+writer.Flush(triggerMerge: false, applyAllDeletes: false);
+
+
+
+
+
+

Construct a query

+
+// search with a phrase
+var phrase = new MultiPhraseQuery();
+phrase.Add(new Term("favouritePhrase", "brown"));
+phrase.Add(new Term("favouritePhrase", "fox"));
+
+
+
+
+

Fetch the results

+
+// re-use the writer to get real-time updates
+var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true));
+var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;
+foreach (var hit in hits)
+{
+    var foundDoc = searcher.Doc(hit.Doc);
+    hit.Score.Dump("Score");
+    foundDoc.Get("name").Dump("Name");
+    foundDoc.Get("favouritePhrase").Dump("Favourite Phrase");
+}
+
+
+
+
+
+
\ No newline at end of file diff --git a/websites/site/lucenetemplate/partials/navbar.tmpl.partial b/websites/site/lucenetemplate/partials/navbar.tmpl.partial new file mode 100644 index 0000000000..ab8f5195d8 --- /dev/null +++ b/websites/site/lucenetemplate/partials/navbar.tmpl.partial @@ -0,0 +1,22 @@ +{{!Copyright (c) Microsoft. All rights reserved. Licensed under the MIT license. See LICENSE file in the project root for full license information.}} + + diff --git a/websites/site/lucenetemplate/styles/main.css b/websites/site/lucenetemplate/styles/main.css new file mode 100644 index 0000000000..812bf28b8c --- /dev/null +++ b/websites/site/lucenetemplate/styles/main.css @@ -0,0 +1,73 @@ +/* .navbar-inverse { + background: #4a95da; + background: rgb(44, 95, 163); + background: -moz-linear-gradient(top, rgba(44, 95, 163, 1) 0%, rgba(64, 150, 238, 1) 100%); + background: -webkit-linear-gradient(top, rgba(44, 95, 163, 1) 0%, rgba(64, 150, 238, 1) 100%); + background: linear-gradient(to bottom, rgba(44, 95, 163, 1) 0%, rgba(64, 150, 238, 1) 100%); + filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#2c5fa3', endColorstr='#4096ee', GradientType=0); + border-color:white; + } + .navbar-inverse .navbar-nav>li>a, .navbar-inverse .navbar-text { + color: #fff; + } + .navbar-inverse .navbar-nav>.active>a { + background-color: #1764AA; + } + .navbar-inverse .navbar-nav>.active>a:focus, .navbar-inverse .navbar-nav>.active>a:hover { + background-color: #1764AA; + } */ + + .btn-primary:hover { + background-color: #1764AA; +} +button, a { + color: #1764AA; + /* #0095eb */ +} +button:hover, +button:focus, +a:hover, +a:focus { + color: #143653; + text-decoration: none; +} +nav.navbar { + background-color:white; +} +.navbar-brand { + height: 80px; +} +.navbar-header .navbar-brand img { + width:300px; + height:55px; + margin:10px 10px 10px 0px; +} +.navbar-toggle .icon-bar{ + margin-top: 2px; + background-color:#0095eb; +} +.navbar-toggle { + border-color:#0095eb; +} +header ul.navbar-nav { + /* font-size:1.2em; */ + float:right; + font-weight: 600; +} + +.sidefilter { + top:120px; +} + +.sidetoc { + top: 180px; + background-color:rgb(247, 247, 247); +} + +body .toc { + background-color:rgb(247, 247, 247); +} + +.sidefilter { + background-color: rgb(247, 247, 247); +} \ No newline at end of file diff --git a/websites/site/lucenetemplate/styles/site.css b/websites/site/lucenetemplate/styles/site.css new file mode 100644 index 0000000000..b4fe7f7772 --- /dev/null +++ b/websites/site/lucenetemplate/styles/site.css @@ -0,0 +1,131 @@ +/* START From hugo academic css */ +#homepage section { + font-family: 'Merriweather', serif; + font-size: 16px; + line-height: 1.65; +} +#homepage pre, #homepage code { + font-family: 'Roboto Mono', 'Courier New', 'Courier', monospace; +} +#homepage h2, #homepage h3, #homepage h4 { + font-family: 'Lato', sans-serif; + font-weight: 400; + margin-bottom: 1em; + line-height: 1.25; + color: #313131; + text-rendering: optimizeLegibility; +} +#homepage h3 { + font-weight: 700; +} +nav.navbar { + font-family: 'Lato', sans-serif; + font-weight: 400; + line-height: 1.25; + text-rendering: optimizeLegibility; + font-size: 16px; +} +.home-section:first-of-type { + padding-top: 50px; +} +.home-section:nth-of-type(even) { + background-color: rgb(247, 247, 247); +} +@media screen and (min-width: 58em) { + #homepage section { + font-size: 20px; + } +} +/* END From hugo academic css */ + +pre.clean { + border: none !important; + border-radius: 0 !important; + background-color: #f8f8f8; + overflow: auto; + display: block; + padding: 9.5px; + margin: 0 0 10px; + font-size: 13px; + line-height: 1.42857143; + color: #333; + word-break: break-all; + word-wrap: break-word; +} + +#intro { + margin-top:80px; + /* Permalink - use to edit and share this gradient: http://colorzilla.com/gradient-editor/#2c5fa3+0,4096ee+100 */ + background: rgb(44, 95, 163); + /* Old browsers */ + background: -moz-linear-gradient(top, rgba(44, 95, 163, 1) 0%, rgba(64, 150, 238, 1) 100%); + /* FF3.6-15 */ + background: -webkit-linear-gradient(top, rgba(44, 95, 163, 1) 0%, rgba(64, 150, 238, 1) 100%); + /* Chrome10-25,Safari5.1-6 */ + background: linear-gradient(to bottom, rgba(44, 95, 163, 1) 0%, rgba(64, 150, 238, 1) 100%); + /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */ + filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#2c5fa3', endColorstr='#4096ee', GradientType=0); + /* IE6-9 */ + color: white; +} +#intro p { + margin: 0 0 10px; + margin-bottom: 2rem; +} + +.project-links { + margin-top: 20px; + font-size:30px; + + vertical-align: bottom; +} + +.project-links a { + color: white; +} + +.project-links a:hover { + color: #0095eb; + text-decoration: none; + transition: color 0.6s ease; +} + +.project-links i { + font-size: 1.7em; + margin-left: 2rem; +} + +#intro h1 h2 h3 h4 h5 { + color: white; +} + +.no-padding { + padding: 0 !important; + margin: 0 !important; +} + +.nuget-well { + -moz-border-radius: 5px; + -webkit-border-radius: 5px; + background-color: #202020; + border: 4px solid silver; + border-radius: 5px; + box-shadow: 2px 2px 3px #6e6e6e; + color: #e2e2e2; + display: block; + font: 1em 'andale mono', 'lucida console', monospace; + line-height: 1em; + overflow: auto; + padding: 15px; + text-align: center; +} + +.home-section { + padding: 4rem 0 4rem 0; +} + +@media screen and (min-width: 700px) { + .project-links { + margin-top: 4rem; + } +} \ No newline at end of file diff --git a/websites/site/lucenetemplate/web.config b/websites/site/lucenetemplate/web.config new file mode 100644 index 0000000000..f6469093ee --- /dev/null +++ b/websites/site/lucenetemplate/web.config @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/websites/site/site.ps1 b/websites/site/site.ps1 new file mode 100644 index 0000000000..c0f15b3ab9 --- /dev/null +++ b/websites/site/site.ps1 @@ -0,0 +1,86 @@ +# ----------------------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the ""License""); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ----------------------------------------------------------------------------------- + +param ( + [Parameter(Mandatory=$false)] + [int] + $ServeDocs = 1, + [Parameter(Mandatory=$false)] + [int] + $Clean = 0, + # LogLevel can be: Diagnostic, Verbose, Info, Warning, Error + [Parameter(Mandatory=$false)] + [string] + $LogLevel = 'Info' +) + +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 + +$PSScriptFilePath = (Get-Item $MyInvocation.MyCommand.Path).FullName +$RepoRoot = (get-item $PSScriptFilePath).Directory.Parent.Parent.FullName; +$SiteFolder = Join-Path -Path $RepoRoot -ChildPath "websites\site"; +$ToolsFolder = Join-Path -Path $SiteFolder -ChildPath "tools"; +#ensure the /build/tools folder +New-Item $ToolsFolder -type directory -force + +if ($Clean -eq 1) { + Write-Host "Cleaning tools..." + Remove-Item (Join-Path -Path $ToolsFolder "\*") -recurse -force -ErrorAction SilentlyContinue +} + +New-Item "$ToolsFolder\tmp" -type directory -force + +# Go get docfx.exe if we don't have it +New-Item "$ToolsFolder\docfx" -type directory -force +$DocFxExe = "$ToolsFolder\docfx\docfx.exe" +if (-not (test-path $DocFxExe)) +{ + Write-Host "Retrieving docfx..." + $DocFxZip = "$ToolsFolder\tmp\docfx.zip" + Invoke-WebRequest "https://github.com/dotnet/docfx/releases/download/v2.38.1/docfx.zip" -OutFile $DocFxZip -TimeoutSec 60 + #unzip + Expand-Archive $DocFxZip -DestinationPath (Join-Path -Path $ToolsFolder -ChildPath "docfx") +} + + Remove-Item -Recurse -Force "$ToolsFolder\tmp" + +# delete anything that already exists +if ($Clean -eq 1) { + Write-Host "Cleaning..." + Remove-Item (Join-Path -Path $SiteFolder "_site\*") -recurse -force -ErrorAction SilentlyContinue + Remove-Item (Join-Path -Path $SiteFolder "_site") -force -ErrorAction SilentlyContinue + Remove-Item (Join-Path -Path $SiteFolder "obj\*") -recurse -force -ErrorAction SilentlyContinue + Remove-Item (Join-Path -Path $SiteFolder "obj") -force -ErrorAction SilentlyContinue +} + +$DocFxJson = Join-Path -Path $SiteFolder "docfx.json" +$DocFxLog = Join-Path -Path $SiteFolder "obj\docfx.log" + +if($?) { + if ($ServeDocs -eq 0){ + # build the output + Write-Host "Building docs..." + & $DocFxExe build $DocFxJson -l "$DocFxLog" --loglevel $LogLevel + } + else { + # build + serve (for testing) + Write-Host "starting website..." + & $DocFxExe $DocFxJson --serve + } +} \ No newline at end of file diff --git a/websites/site/toc.yml b/websites/site/toc.yml new file mode 100644 index 0000000000..b8b211a5a1 --- /dev/null +++ b/websites/site/toc.yml @@ -0,0 +1,12 @@ +- name: About + href: /#about +- name: Quick start + href: /#quick-start +- name: Download + href: download/ + topicHref: download/download.md +- name: Documentation + topicHref: docs.md +- name: Contributing + href: contributing/ + topicHref: contributing/index.md \ No newline at end of file