-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a simple JSON field mapper. (#33923)
* Add a simple JSON field type. * Add support for ignore_above. * Add support for null_value. * Add support for split_queries_on_whitespace. * Prevent norms from being enabled. * Clarify the message around copy_to not being supported. * Disallow wildcard queries. * For now, disallow the field from being stored.
- Loading branch information
1 parent
ec2ce6e
commit eebfc65
Showing
7 changed files
with
1,060 additions
and
0 deletions.
There are no files selected for viewing
327 changes: 327 additions & 0 deletions
327
server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,327 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.index.mapper; | ||
|
||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; | ||
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.index.IndexableField; | ||
import org.apache.lucene.index.Term; | ||
import org.apache.lucene.search.MultiTermQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.TermQuery; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.elasticsearch.common.lucene.Lucene; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.unit.Fuzziness; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.common.xcontent.XContentParser; | ||
import org.elasticsearch.common.xcontent.support.XContentMapValues; | ||
import org.elasticsearch.index.analysis.AnalyzerScope; | ||
import org.elasticsearch.index.analysis.NamedAnalyzer; | ||
import org.elasticsearch.index.query.QueryShardContext; | ||
|
||
import java.io.IOException; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
import static org.elasticsearch.index.mapper.TypeParsers.parseField; | ||
|
||
/** | ||
* A field mapper that accepts a JSON object and flattens it into a single field. This data type | ||
* can be a useful alternative to an 'object' mapping when the object has a large, unknown set | ||
* of keys. | ||
* | ||
* Currently the mapper extracts all leaf values of the JSON object, converts them to their text | ||
* representations, and indexes each one as a keyword. As an example, given a json field called | ||
* 'json_field' and the following input | ||
* | ||
* { | ||
* "json_field: { | ||
* "key1": "some value", | ||
* "key2": { | ||
* "key3": true | ||
* } | ||
* } | ||
* } | ||
* | ||
* the mapper will produce untokenized string fields with the values "some value" and "true". | ||
*/ | ||
public final class JsonFieldMapper extends FieldMapper { | ||
|
||
public static final String CONTENT_TYPE = "json"; | ||
public static final NamedAnalyzer WHITESPACE_ANALYZER = new NamedAnalyzer( | ||
"whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()); | ||
|
||
private static class Defaults { | ||
public static final MappedFieldType FIELD_TYPE = new JsonFieldType(); | ||
|
||
static { | ||
FIELD_TYPE.setTokenized(false); | ||
FIELD_TYPE.setOmitNorms(true); | ||
FIELD_TYPE.setStored(false); | ||
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); | ||
FIELD_TYPE.freeze(); | ||
} | ||
|
||
public static final int IGNORE_ABOVE = Integer.MAX_VALUE; | ||
} | ||
|
||
public static class Builder extends FieldMapper.Builder<Builder, JsonFieldMapper> { | ||
private int ignoreAbove = Defaults.IGNORE_ABOVE; | ||
|
||
public Builder(String name) { | ||
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); | ||
builder = this; | ||
} | ||
|
||
@Override | ||
public JsonFieldType fieldType() { | ||
return (JsonFieldType) super.fieldType(); | ||
} | ||
|
||
@Override | ||
public Builder indexOptions(IndexOptions indexOptions) { | ||
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) { | ||
throw new IllegalArgumentException("The [" + CONTENT_TYPE | ||
+ "] field does not support positions, got [index_options]=" | ||
+ indexOptionToString(indexOptions)); | ||
} | ||
return super.indexOptions(indexOptions); | ||
} | ||
|
||
public Builder ignoreAbove(int ignoreAbove) { | ||
if (ignoreAbove < 0) { | ||
throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove); | ||
} | ||
this.ignoreAbove = ignoreAbove; | ||
return this; | ||
} | ||
|
||
public Builder splitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { | ||
fieldType().setSplitQueriesOnWhitespace(splitQueriesOnWhitespace); | ||
return builder; | ||
} | ||
|
||
@Override | ||
public Builder addMultiField(Mapper.Builder mapperBuilder) { | ||
throw new UnsupportedOperationException("[fields] is not supported for [" + CONTENT_TYPE + "] fields."); | ||
} | ||
|
||
@Override | ||
public Builder copyTo(CopyTo copyTo) { | ||
throw new UnsupportedOperationException("[copy_to] is not supported for [" + CONTENT_TYPE + "] fields."); | ||
} | ||
|
||
@Override | ||
public Builder store(boolean store) { | ||
throw new UnsupportedOperationException("[store] is not currently supported for [" + | ||
CONTENT_TYPE + "] fields."); | ||
} | ||
|
||
@Override | ||
public JsonFieldMapper build(BuilderContext context) { | ||
setupFieldType(context); | ||
if (fieldType().splitQueriesOnWhitespace()) { | ||
fieldType().setSearchAnalyzer(WHITESPACE_ANALYZER); | ||
} | ||
return new JsonFieldMapper(name, fieldType, defaultFieldType, | ||
ignoreAbove, context.indexSettings()); | ||
} | ||
} | ||
|
||
public static class TypeParser implements Mapper.TypeParser { | ||
@Override | ||
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException { | ||
JsonFieldMapper.Builder builder = new JsonFieldMapper.Builder(name); | ||
parseField(builder, name, node, parserContext); | ||
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) { | ||
Map.Entry<String, Object> entry = iterator.next(); | ||
String propName = entry.getKey(); | ||
Object propNode = entry.getValue(); | ||
if (propName.equals("ignore_above")) { | ||
builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1)); | ||
iterator.remove(); | ||
} else if (propName.equals("null_value")) { | ||
if (propNode == null) { | ||
throw new MapperParsingException("Property [null_value] cannot be null."); | ||
} | ||
builder.nullValue(propNode.toString()); | ||
iterator.remove(); | ||
} else if (propName.equals("split_queries_on_whitespace")) { | ||
builder.splitQueriesOnWhitespace | ||
(XContentMapValues.nodeBooleanValue(propNode, "split_queries_on_whitespace")); | ||
iterator.remove(); | ||
} | ||
} | ||
return builder; | ||
} | ||
} | ||
|
||
public static final class JsonFieldType extends StringFieldType { | ||
private boolean splitQueriesOnWhitespace; | ||
|
||
public JsonFieldType() { | ||
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); | ||
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); | ||
} | ||
|
||
private JsonFieldType(JsonFieldType ref) { | ||
super(ref); | ||
this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (o == null || getClass() != o.getClass()) return false; | ||
if (!super.equals(o)) return false; | ||
JsonFieldType that = (JsonFieldType) o; | ||
return splitQueriesOnWhitespace == that.splitQueriesOnWhitespace; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(super.hashCode(), splitQueriesOnWhitespace); | ||
} | ||
|
||
public JsonFieldType clone() { | ||
return new JsonFieldType(this); | ||
} | ||
|
||
@Override | ||
public String typeName() { | ||
return CONTENT_TYPE; | ||
} | ||
|
||
public boolean splitQueriesOnWhitespace() { | ||
return splitQueriesOnWhitespace; | ||
} | ||
|
||
public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { | ||
checkIfFrozen(); | ||
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace; | ||
} | ||
|
||
@Override | ||
public Query existsQuery(QueryShardContext context) { | ||
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); | ||
} | ||
|
||
@Override | ||
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions, | ||
boolean transpositions) { | ||
throw new UnsupportedOperationException("[fuzzy] queries are not currently supported on [" + | ||
CONTENT_TYPE + "] fields."); | ||
} | ||
|
||
@Override | ||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, | ||
MultiTermQuery.RewriteMethod method, QueryShardContext context) { | ||
throw new UnsupportedOperationException("[regexp] queries are not currently supported on [" + | ||
CONTENT_TYPE + "] fields."); | ||
} | ||
|
||
@Override | ||
public Query wildcardQuery(String value, | ||
MultiTermQuery.RewriteMethod method, | ||
QueryShardContext context) { | ||
throw new UnsupportedOperationException("[wildcard] queries are not currently supported on [" + | ||
CONTENT_TYPE + "] fields."); | ||
} | ||
|
||
@Override | ||
public Object valueForDisplay(Object value) { | ||
if (value == null) { | ||
return null; | ||
} | ||
BytesRef binaryValue = (BytesRef) value; | ||
return binaryValue.utf8ToString(); | ||
} | ||
} | ||
|
||
private final JsonFieldParser fieldParser; | ||
private int ignoreAbove; | ||
|
||
private JsonFieldMapper(String simpleName, | ||
MappedFieldType fieldType, | ||
MappedFieldType defaultFieldType, | ||
int ignoreAbove, | ||
Settings indexSettings) { | ||
super(simpleName, fieldType, defaultFieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); | ||
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; | ||
|
||
this.ignoreAbove = ignoreAbove; | ||
this.fieldParser = new JsonFieldParser(fieldType, ignoreAbove); | ||
} | ||
|
||
@Override | ||
protected String contentType() { | ||
return CONTENT_TYPE; | ||
} | ||
|
||
@Override | ||
protected void doMerge(Mapper mergeWith) { | ||
super.doMerge(mergeWith); | ||
this.ignoreAbove = ((JsonFieldMapper) mergeWith).ignoreAbove; | ||
} | ||
|
||
@Override | ||
protected JsonFieldMapper clone() { | ||
return (JsonFieldMapper) super.clone(); | ||
} | ||
|
||
@Override | ||
public JsonFieldType fieldType() { | ||
return (JsonFieldType) super.fieldType(); | ||
} | ||
|
||
@Override | ||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException { | ||
if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { | ||
return; | ||
} | ||
|
||
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { | ||
fields.addAll(fieldParser.parse(context.parser())); | ||
createFieldNamesField(context, fields); | ||
} else { | ||
context.parser().skipChildren(); | ||
} | ||
} | ||
|
||
@Override | ||
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { | ||
super.doXContentBody(builder, includeDefaults, params); | ||
|
||
if (includeDefaults || fieldType().nullValue() != null) { | ||
builder.field("null_value", fieldType().nullValue()); | ||
} | ||
|
||
if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) { | ||
builder.field("ignore_above", ignoreAbove); | ||
} | ||
|
||
if (includeDefaults || fieldType().splitQueriesOnWhitespace()) { | ||
builder.field("split_queries_on_whitespace", fieldType().splitQueriesOnWhitespace()); | ||
} | ||
} | ||
} |
Oops, something went wrong.