Skip to content

Commit

Permalink
PhraseSuggester: Collate option should allow returning phrases with n…
Browse files Browse the repository at this point in the history
…o matching docs

A new option `prune` has been added to allow users to control phrase suggestion pruning when `collate`
is set. If the new option is set, the phrase suggestion option will contain a boolean `collate_match`
indicating whether the respective result had hits in collation.

CLoses elastic#6927
  • Loading branch information
areek committed Jul 22, 2014
1 parent 0faffcf commit f39d4e1
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 6 deletions.
13 changes: 11 additions & 2 deletions docs/reference/search/suggesters/phrase-suggest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,14 @@ can contain misspellings (See parameter descriptions below).
automatically made available as the `{{suggestion}}` variable, which
should be used in your query/filter. You can still specify your own
template `params` -- the `suggestion` value will be added to the
variables you specify. You can also specify a `preference` to control
variables you specify. You can specify a `preference` to control
on which shards the query is executed (see <<search-request-preference>>).
The default value is `_only_local`.
The default value is `_only_local`. Additionally, you can specify
a `prune` to control if all phrase suggestions will be
returned, when set to `true` the suggestions will have an additional
option `collate_match`, which will be `true` if matching documents
for the phrase was found, `false` otherwise. The default value for
`prune` is `false`.

[source,js]
--------------------------------------------------
Expand All @@ -195,6 +200,7 @@ curl -XPOST 'localhost:9200/_search' -d {
},
"params": {"field_name" : "title"}, <3>
"preference": "_primary", <4>
"prune": true <5>
}
}
}
Expand All @@ -207,6 +213,9 @@ curl -XPOST 'localhost:9200/_search' -d {
<3> An additional `field_name` variable has been specified in
`params` and is used by the `match` query.
<4> The default `preference` has been changed to `_primary`.
<5> All suggestions will be returned with an extra `collate_match`
option indicating whether the generated phrase matched any
document.

==== Smoothing Models

Expand Down
29 changes: 28 additions & 1 deletion src/main/java/org/elasticsearch/search/suggest/Suggest.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
Expand Down Expand Up @@ -521,17 +522,24 @@ static class Fields {
static final XContentBuilderString TEXT = new XContentBuilderString("text");
static final XContentBuilderString HIGHLIGHTED = new XContentBuilderString("highlighted");
static final XContentBuilderString SCORE = new XContentBuilderString("score");
static final XContentBuilderString COLLATE_MATCH = new XContentBuilderString("collate_match");

}

private Text text;
private Text highlighted;
private float score;
private Boolean collateMatch;

public Option(Text text, Text highlighted, float score) {
public Option(Text text, Text highlighted, float score, Boolean collateMatch) {
this.text = text;
this.highlighted = highlighted;
this.score = score;
this.collateMatch = collateMatch;
}

public Option(Text text, Text highlighted, float score) {
this(text, highlighted, score, null);
}

public Option(Text text, float score) {
Expand Down Expand Up @@ -562,6 +570,14 @@ public Text getHighlighted() {
public float getScore() {
return score;
}

/**
* @return true if collation has found a match for the entry.
* if collate was not set, the value defaults to <code>true</code>
*/
public boolean collateMatch() {
return (collateMatch != null) ? collateMatch : true;
}

protected void setScore(float score) {
this.score = score;
Expand All @@ -572,13 +588,21 @@ public void readFrom(StreamInput in) throws IOException {
text = in.readText();
score = in.readFloat();
highlighted = in.readOptionalText();

if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
collateMatch = in.readOptionalBoolean();
}
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeText(text);
out.writeFloat(score);
out.writeOptionalText(highlighted);

if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
out.writeOptionalBoolean(collateMatch);
}
}

@Override
Expand All @@ -595,6 +619,9 @@ protected XContentBuilder innerToXContent(XContentBuilder builder, Params params
builder.field(Fields.HIGHLIGHTED, highlighted);
}
builder.field(Fields.SCORE, score);
if (collateMatch != null) {
builder.field(Fields.COLLATE_MATCH, collateMatch.booleanValue());
}
return builder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma
suggestion.setPreference(parser.text());
} else if ("params".equals(fieldName)) {
suggestion.setCollateScriptParams(parser.map());
} else if ("prune".equals(fieldName)) {
if (parser.isBooleanValue()) {
suggestion.setCollatePrune(parser.booleanValue());
} else {
throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] prune must be either 'true' or 'false'");
}
} else {
throw new ElasticsearchIllegalArgumentException(
"suggester[phrase][collate] doesn't support field [" + fieldName + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,12 @@ public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, P
BytesRef byteSpare = new BytesRef();

MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare);
final boolean collateEnabled = multiSearchResponse != null;
final boolean collatePrune = suggestion.collatePrune();

for (int i = 0; i < checkerResult.corrections.length; i++) {
if (!hasMatchingDocs(multiSearchResponse, i)) {
boolean collateMatch = hasMatchingDocs(multiSearchResponse, i);
if (!collateMatch && !collatePrune) {
continue;
}
Correction correction = checkerResult.corrections[i];
Expand All @@ -119,7 +122,11 @@ public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, P
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
if (collateEnabled && collatePrune) {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
} else {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
} else {
response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
private String collateFilter;
private String collatePreference;
private Map<String, Object> collateParams;
private Boolean collatePrune;

public PhraseSuggestionBuilder(String name) {
super(name, "phrase");
Expand Down Expand Up @@ -202,6 +203,14 @@ public PhraseSuggestionBuilder collateParams(Map<String, Object> collateParams)
return this;
}

/**
* Sets whether to prune suggestions after collation
*/
public PhraseSuggestionBuilder collatePrune(boolean collatePrune) {
this.collatePrune = collatePrune;
return this;
}

@Override
public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
if (realWordErrorLikelihood != null) {
Expand Down Expand Up @@ -260,6 +269,9 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) t
if (collateParams != null) {
builder.field("params", collateParams);
}
if (collatePrune != null) {
builder.field("prune", collatePrune.booleanValue());
}
builder.endObject();
}
return builder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class PhraseSuggestionContext extends SuggestionContext {
private WordScorer.WordScorerFactory scorer;

private boolean requireUnigram = true;
private boolean prune = false;

public PhraseSuggestionContext(Suggester<? extends PhraseSuggestionContext> suggester) {
super(suggester);
Expand Down Expand Up @@ -221,4 +222,12 @@ void setCollateScriptParams(Map<String, Object> collateScriptParams) {
this.collateScriptParams = collateScriptParams;
}

void setCollatePrune(boolean prune) {
this.prune = prune;
}

boolean collatePrune() {
return prune;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,7 @@ public void suggestWithManyCandidates() throws InterruptedException, ExecutionEx
}

@Test
public void suggestPhrasesInIndex() throws InterruptedException, ExecutionException, IOException {
public void testPhraseSuggesterCollate() throws InterruptedException, ExecutionException, IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
Expand Down Expand Up @@ -1253,6 +1253,13 @@ public void suggestPhrasesInIndex() throws InterruptedException, ExecutionExcept
} catch (ElasticsearchException e) {
// expected
}

// collate request with prune set to true
PhraseSuggestionBuilder phraseSuggestWithParamsAndReturn = suggest.collateFilter(null).collateQuery(collateWithParams).collateParams(params).collatePrune(true);
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParamsAndReturn);
assertSuggestionSize(searchSuggest, 0, 10, "title");
assertSuggestionPhraseCollateMatchExists(searchSuggest, "title", 2);

}

protected Suggest searchSuggest(SuggestionBuilder<?>... suggestion) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,22 @@ public static void assertSuggestionSize(Suggest searchSuggest, int entry, int si
assertVersionSerializable(searchSuggest);
}

public static void assertSuggestionPhraseCollateMatchExists(Suggest searchSuggest, String key, int numberOfPhraseExists) {
int counter = 0;
assertThat(searchSuggest, notNullValue());
String msg = "Suggest result: " + searchSuggest.toString();
assertThat(msg, searchSuggest.size(), greaterThanOrEqualTo(1));
assertThat(msg, searchSuggest.getSuggestion(key).getName(), equalTo(key));

for (Suggest.Suggestion.Entry.Option option : searchSuggest.getSuggestion(key).getEntries().get(0).getOptions()) {
if (option.collateMatch()) {
counter++;
}
}

assertThat(counter, equalTo(numberOfPhraseExists));
}

public static void assertSuggestion(Suggest searchSuggest, int entry, int ord, String key, String text) {
assertThat(searchSuggest, notNullValue());
String msg = "Suggest result: " + searchSuggest.toString();
Expand Down

0 comments on commit f39d4e1

Please sign in to comment.