Skip to content

Commit

Permalink
When parsing JSON fields, also create tokens prefixed with the field …
Browse files Browse the repository at this point in the history
…key. (#34207)
  • Loading branch information
jtibshirani committed Mar 8, 2019
1 parent 275235b commit 4a58906
Show file tree
Hide file tree
Showing 5 changed files with 327 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@
* of keys.
*
* Currently the mapper extracts all leaf values of the JSON object, converts them to their text
* representations, and indexes each one as a keyword. As an example, given a json field called
* 'json_field' and the following input
* representations, and indexes each one as a keyword. It creates both a 'keyed' version of the token
* to allow searches on particular key-value pairs, as well as a 'root' token without the key
*
* As an example, given a json field called 'json_field' and the following input
*
* {
* "json_field: {
Expand All @@ -63,13 +65,18 @@
* }
* }
*
* the mapper will produce untokenized string fields with the values "some value" and "true".
* the mapper will produce untokenized string fields called "json_field" with values "some value" and "true",
* as well as string fields called "json_field._keyed" with values "key\0some value" and "key2.key3\0true".
*
* Note that \0 is a reserved separator character, and cannot be used in the keys of the JSON object
* (see {@link JsonFieldParser#SEPARATOR}).
*/
public final class JsonFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "json";
public static final NamedAnalyzer WHITESPACE_ANALYZER = new NamedAnalyzer(
"whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
public static final String KEYED_FIELD_SUFFIX = "._keyed";

private static class Defaults {
public static final MappedFieldType FIELD_TYPE = new JsonFieldType();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,53 +31,116 @@

/**
* A helper class for {@link JsonFieldMapper} parses a JSON object
* and produces an indexable field for each leaf value.
* and produces a pair of indexable fields for each leaf value.
*/
public class JsonFieldParser {
private static final String SEPARATOR = "\0";

private final MappedFieldType fieldType;
private final int ignoreAbove;

private final String rootFieldName;
private final String keyedFieldName;

JsonFieldParser(MappedFieldType fieldType,
int ignoreAbove) {
this.fieldType = fieldType;
this.ignoreAbove = ignoreAbove;

this.rootFieldName = fieldType.name();
this.keyedFieldName = fieldType.name() + JsonFieldMapper.KEYED_FIELD_SUFFIX;
}

public List<IndexableField> parse(XContentParser parser) throws IOException {
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT,
parser.currentToken(),
parser::getTokenLocation);

ContentPath path = new ContentPath();
List<IndexableField> fields = new ArrayList<>();
int openObjects = 1;

parseObject(parser, path, fields);
return fields;
}

private void parseObject(XContentParser parser,
ContentPath path,
List<IndexableField> fields) throws IOException {
String currentName = null;
while (true) {
if (openObjects == 0) {
return fields;
XContentParser.Token token = parser.nextToken();
if (token == XContentParser.Token.END_OBJECT) {
return;
}

if (token == XContentParser.Token.FIELD_NAME) {
currentName = parser.currentName();
} else {
assert currentName != null;
parseFieldValue(token, parser, path, currentName, fields);
}
}
}

private void parseArray(XContentParser parser,
ContentPath path,
String currentName,
List<IndexableField> fields) throws IOException {
while (true) {
XContentParser.Token token = parser.nextToken();
assert token != null;

if (token == XContentParser.Token.START_OBJECT) {
openObjects++;
} else if (token == XContentParser.Token.END_OBJECT) {
openObjects--;
} else if (token.isValue()) {
String value = parser.text();
addField(value, fields);
} else if (token == XContentParser.Token.VALUE_NULL) {
String value = fieldType.nullValueAsString();
if (value != null) {
addField(value, fields);
}
if (token == XContentParser.Token.END_ARRAY) {
return;
}
parseFieldValue(token, parser, path, currentName, fields);
}
}

private void parseFieldValue(XContentParser.Token token,
XContentParser parser,
ContentPath path,
String currentName,
List<IndexableField> fields) throws IOException {
if (token == XContentParser.Token.START_OBJECT) {
path.add(currentName);
parseObject(parser, path, fields);
path.remove();
} else if (token == XContentParser.Token.START_ARRAY) {
parseArray(parser, path, currentName, fields);
} else if (token.isValue()) {
String value = parser.text();
addField(path, currentName, value, fields);
} else if (token == XContentParser.Token.VALUE_NULL) {
String value = fieldType.nullValueAsString();
if (value != null) {
addField(path, currentName, value, fields);
}
} else {
// Note that we throw an exception here just to be safe. We don't actually expect to reach
// this case, since XContentParser verifies that the input is well-formed as it parses.
throw new IllegalArgumentException("Encountered unexpected token [" + token.toString() + "].");
}
}

private void addField(String value, List<IndexableField> fields) {
if (value.length() <= ignoreAbove) {
fields.add(new Field(fieldType.name(), new BytesRef(value), fieldType));
private void addField(ContentPath path,
String currentName,
String value,
List<IndexableField> fields) {
if (value.length() > ignoreAbove) {
return;
}

String key = path.pathAsText(currentName);
if (key.contains(SEPARATOR)) {
throw new IllegalArgumentException("Keys in [json] fields cannot contain the reserved character \\0."
+ " Offending key: [" + key + "].");
}
String keyedValue = createKeyedValue(key, value);

fields.add(new Field(rootFieldName, new BytesRef(value), fieldType));
fields.add(new Field(keyedFieldName, new BytesRef(keyedValue), fieldType));
}

private static String createKeyedValue(String key, String value) {
return key + SEPARATOR + value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,31 @@ public void testDefaults() throws Exception {

BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject()
.startObject("field")
.field("key1", "value")
.field("key2", true)
.field("key", "value")
.endObject()
.endObject());

ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON));

IndexableField[] fields = parsedDoc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals(1, fields.length);

assertEquals("field", fields[0].name());
assertEquals(new BytesRef("value"), fields[0].binaryValue());
assertFalse(fields[0].fieldType().stored());
assertTrue(fields[0].fieldType().omitNorms());

IndexableField field1 = fields[0];
assertEquals("field", field1.name());
assertEquals(new BytesRef("value"), field1.binaryValue());
assertTrue(field1.fieldType().omitNorms());
IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed");
assertEquals(1, keyedFields.length);

IndexableField field2 = fields[1];
assertEquals("field", field2.name());
assertEquals(new BytesRef("true"), field2.binaryValue());
assertTrue(field2.fieldType().omitNorms());
assertEquals("field._keyed", keyedFields[0].name());
assertEquals(new BytesRef("key\0value"), keyedFields[0].binaryValue());
assertFalse(keyedFields[0].fieldType().stored());
assertTrue(keyedFields[0].fieldType().omitNorms());

IndexableField[] fieldNamesFields = parsedDoc.rootDoc().getFields(FieldNamesFieldMapper.NAME);
assertEquals(1, fieldNamesFields.length);

IndexableField fieldNamesField = fieldNamesFields[0];
assertEquals("field", fieldNamesField.stringValue());
assertEquals("field", fieldNamesFields[0].stringValue());
}

public void testDisableIndex() throws Exception {
Expand Down Expand Up @@ -248,20 +249,18 @@ public void testFieldMultiplicity() throws Exception {
.endObject());

ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON));

IndexableField[] fields = parsedDoc.rootDoc().getFields("field");
assertEquals(3, fields.length);

IndexableField field1 = fields[0];
assertEquals("field", field1.name());
assertEquals(new BytesRef("value"), field1.binaryValue());

IndexableField field2 = fields[1];
assertEquals("field", field2.name());
assertEquals(new BytesRef("true"), field2.binaryValue());

IndexableField field3 = fields[2];
assertEquals("field", field3.name());
assertEquals(new BytesRef("false"), field3.binaryValue());
assertEquals(new BytesRef("value"), fields[0].binaryValue());
assertEquals(new BytesRef("true"), fields[1].binaryValue());
assertEquals(new BytesRef("false"), fields[2].binaryValue());

IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed");
assertEquals(3, keyedFields.length);
assertEquals(new BytesRef("key1\0value"), keyedFields[0].binaryValue());
assertEquals(new BytesRef("key2\0true"), keyedFields[1].binaryValue());
assertEquals(new BytesRef("key3\0false"), keyedFields[2].binaryValue());
}

public void testIgnoreAbove() throws IOException {
Expand Down Expand Up @@ -292,7 +291,6 @@ public void testIgnoreAbove() throws IOException {
assertEquals(0, fields.length);
}


public void testNullValues() throws Exception {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
.startObject("type")
Expand Down Expand Up @@ -326,8 +324,11 @@ public void testNullValues() throws Exception {

IndexableField[] otherFields = parsedDoc.rootDoc().getFields("other_field");
assertEquals(1, otherFields.length);
IndexableField field = otherFields[0];
assertEquals(new BytesRef("placeholder"), field.binaryValue());
assertEquals(new BytesRef("placeholder"), otherFields[0].binaryValue());

IndexableField[] prefixedOtherFields = parsedDoc.rootDoc().getFields("other_field._keyed");
assertEquals(1, prefixedOtherFields.length);
assertEquals(new BytesRef("key\0placeholder"), prefixedOtherFields[0].binaryValue());
}

public void testSplitQueriesOnWhitespace() throws IOException {
Expand Down
Loading

0 comments on commit 4a58906

Please sign in to comment.