From 4a589066f17ead6a9dc2b7cbe12c14909cb778a3 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 3 Oct 2018 23:29:20 -0700 Subject: [PATCH] When parsing JSON fields, also create tokens prefixed with the field key. (#34207) --- .../index/mapper/JsonFieldMapper.java | 13 +- .../index/mapper/JsonFieldParser.java | 105 ++++++++--- .../index/mapper/JsonFieldMapperTests.java | 59 +++--- .../index/mapper/JsonFieldParserTests.java | 172 +++++++++++++++++- .../search/query/SearchQueryIT.java | 41 +++++ 5 files changed, 327 insertions(+), 63 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java index 7efcdf5ab9039..bc6bcec26008d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java @@ -51,8 +51,10 @@ * of keys. * * Currently the mapper extracts all leaf values of the JSON object, converts them to their text - * representations, and indexes each one as a keyword. As an example, given a json field called - * 'json_field' and the following input + * representations, and indexes each one as a keyword. It creates both a 'keyed' version of the token + * to allow searches on particular key-value pairs, as well as a 'root' token without the key + * + * As an example, given a json field called 'json_field' and the following input * * { * "json_field: { @@ -63,13 +65,18 @@ * } * } * - * the mapper will produce untokenized string fields with the values "some value" and "true". + * the mapper will produce untokenized string fields called "json_field" with values "some value" and "true", + * as well as string fields called "json_field._keyed" with values "key\0some value" and "key2.key3\0true". + * + * Note that \0 is a reserved separator character, and cannot be used in the keys of the JSON object + * (see {@link JsonFieldParser#SEPARATOR}). */ public final class JsonFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "json"; public static final NamedAnalyzer WHITESPACE_ANALYZER = new NamedAnalyzer( "whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()); + public static final String KEYED_FIELD_SUFFIX = "._keyed"; private static class Defaults { public static final MappedFieldType FIELD_TYPE = new JsonFieldType(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldParser.java b/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldParser.java index 25a40235844e9..9a1ed8d4d9bf1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/JsonFieldParser.java @@ -31,16 +31,24 @@ /** * A helper class for {@link JsonFieldMapper} parses a JSON object - * and produces an indexable field for each leaf value. + * and produces a pair of indexable fields for each leaf value. */ public class JsonFieldParser { + private static final String SEPARATOR = "\0"; + private final MappedFieldType fieldType; private final int ignoreAbove; + private final String rootFieldName; + private final String keyedFieldName; + JsonFieldParser(MappedFieldType fieldType, int ignoreAbove) { this.fieldType = fieldType; this.ignoreAbove = ignoreAbove; + + this.rootFieldName = fieldType.name(); + this.keyedFieldName = fieldType.name() + JsonFieldMapper.KEYED_FIELD_SUFFIX; } public List parse(XContentParser parser) throws IOException { @@ -48,36 +56,91 @@ public List parse(XContentParser parser) throws IOException { parser.currentToken(), parser::getTokenLocation); + ContentPath path = new ContentPath(); List fields = new ArrayList<>(); - int openObjects = 1; + parseObject(parser, path, fields); + return fields; + } + + private void parseObject(XContentParser parser, + ContentPath path, + List fields) throws IOException { + String currentName = null; while (true) { - if (openObjects == 0) { - return fields; + XContentParser.Token token = parser.nextToken(); + if (token == XContentParser.Token.END_OBJECT) { + return; + } + + if (token == XContentParser.Token.FIELD_NAME) { + currentName = parser.currentName(); + } else { + assert currentName != null; + parseFieldValue(token, parser, path, currentName, fields); } + } + } + private void parseArray(XContentParser parser, + ContentPath path, + String currentName, + List fields) throws IOException { + while (true) { XContentParser.Token token = parser.nextToken(); - assert token != null; - - if (token == XContentParser.Token.START_OBJECT) { - openObjects++; - } else if (token == XContentParser.Token.END_OBJECT) { - openObjects--; - } else if (token.isValue()) { - String value = parser.text(); - addField(value, fields); - } else if (token == XContentParser.Token.VALUE_NULL) { - String value = fieldType.nullValueAsString(); - if (value != null) { - addField(value, fields); - } + if (token == XContentParser.Token.END_ARRAY) { + return; + } + parseFieldValue(token, parser, path, currentName, fields); + } + } + + private void parseFieldValue(XContentParser.Token token, + XContentParser parser, + ContentPath path, + String currentName, + List fields) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + path.add(currentName); + parseObject(parser, path, fields); + path.remove(); + } else if (token == XContentParser.Token.START_ARRAY) { + parseArray(parser, path, currentName, fields); + } else if (token.isValue()) { + String value = parser.text(); + addField(path, currentName, value, fields); + } else if (token == XContentParser.Token.VALUE_NULL) { + String value = fieldType.nullValueAsString(); + if (value != null) { + addField(path, currentName, value, fields); } + } else { + // Note that we throw an exception here just to be safe. We don't actually expect to reach + // this case, since XContentParser verifies that the input is well-formed as it parses. + throw new IllegalArgumentException("Encountered unexpected token [" + token.toString() + "]."); } } - private void addField(String value, List fields) { - if (value.length() <= ignoreAbove) { - fields.add(new Field(fieldType.name(), new BytesRef(value), fieldType)); + private void addField(ContentPath path, + String currentName, + String value, + List fields) { + if (value.length() > ignoreAbove) { + return; } + + String key = path.pathAsText(currentName); + if (key.contains(SEPARATOR)) { + throw new IllegalArgumentException("Keys in [json] fields cannot contain the reserved character \\0." + + " Offending key: [" + key + "]."); + } + String keyedValue = createKeyedValue(key, value); + + fields.add(new Field(rootFieldName, new BytesRef(value), fieldType)); + fields.add(new Field(keyedFieldName, new BytesRef(keyedValue), fieldType)); + } + + private static String createKeyedValue(String key, String value) { + return key + SEPARATOR + value; } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldMapperTests.java index d15bc83068995..01bfe7ddcbe9e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldMapperTests.java @@ -72,30 +72,31 @@ public void testDefaults() throws Exception { BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() .startObject("field") - .field("key1", "value") - .field("key2", true) + .field("key", "value") .endObject() .endObject()); ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); - assertEquals(2, fields.length); + assertEquals(1, fields.length); + + assertEquals("field", fields[0].name()); + assertEquals(new BytesRef("value"), fields[0].binaryValue()); + assertFalse(fields[0].fieldType().stored()); + assertTrue(fields[0].fieldType().omitNorms()); - IndexableField field1 = fields[0]; - assertEquals("field", field1.name()); - assertEquals(new BytesRef("value"), field1.binaryValue()); - assertTrue(field1.fieldType().omitNorms()); + IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed"); + assertEquals(1, keyedFields.length); - IndexableField field2 = fields[1]; - assertEquals("field", field2.name()); - assertEquals(new BytesRef("true"), field2.binaryValue()); - assertTrue(field2.fieldType().omitNorms()); + assertEquals("field._keyed", keyedFields[0].name()); + assertEquals(new BytesRef("key\0value"), keyedFields[0].binaryValue()); + assertFalse(keyedFields[0].fieldType().stored()); + assertTrue(keyedFields[0].fieldType().omitNorms()); IndexableField[] fieldNamesFields = parsedDoc.rootDoc().getFields(FieldNamesFieldMapper.NAME); assertEquals(1, fieldNamesFields.length); - - IndexableField fieldNamesField = fieldNamesFields[0]; - assertEquals("field", fieldNamesField.stringValue()); + assertEquals("field", fieldNamesFields[0].stringValue()); } public void testDisableIndex() throws Exception { @@ -248,20 +249,18 @@ public void testFieldMultiplicity() throws Exception { .endObject()); ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); assertEquals(3, fields.length); - - IndexableField field1 = fields[0]; - assertEquals("field", field1.name()); - assertEquals(new BytesRef("value"), field1.binaryValue()); - - IndexableField field2 = fields[1]; - assertEquals("field", field2.name()); - assertEquals(new BytesRef("true"), field2.binaryValue()); - - IndexableField field3 = fields[2]; - assertEquals("field", field3.name()); - assertEquals(new BytesRef("false"), field3.binaryValue()); + assertEquals(new BytesRef("value"), fields[0].binaryValue()); + assertEquals(new BytesRef("true"), fields[1].binaryValue()); + assertEquals(new BytesRef("false"), fields[2].binaryValue()); + + IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed"); + assertEquals(3, keyedFields.length); + assertEquals(new BytesRef("key1\0value"), keyedFields[0].binaryValue()); + assertEquals(new BytesRef("key2\0true"), keyedFields[1].binaryValue()); + assertEquals(new BytesRef("key3\0false"), keyedFields[2].binaryValue()); } public void testIgnoreAbove() throws IOException { @@ -292,7 +291,6 @@ public void testIgnoreAbove() throws IOException { assertEquals(0, fields.length); } - public void testNullValues() throws Exception { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() .startObject("type") @@ -326,8 +324,11 @@ public void testNullValues() throws Exception { IndexableField[] otherFields = parsedDoc.rootDoc().getFields("other_field"); assertEquals(1, otherFields.length); - IndexableField field = otherFields[0]; - assertEquals(new BytesRef("placeholder"), field.binaryValue()); + assertEquals(new BytesRef("placeholder"), otherFields[0].binaryValue()); + + IndexableField[] prefixedOtherFields = parsedDoc.rootDoc().getFields("other_field._keyed"); + assertEquals(1, prefixedOtherFields.length); + assertEquals(new BytesRef("key\0placeholder"), prefixedOtherFields[0].binaryValue()); } public void testSplitQueriesOnWhitespace() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldParserTests.java index a60637a87e14b..a39513a0ac319 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/JsonFieldParserTests.java @@ -19,12 +19,17 @@ package org.elasticsearch.index.mapper; +import com.fasterxml.jackson.core.JsonParseException; import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.mapper.JsonFieldMapper.JsonFieldType; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.XContentTestUtils; import org.junit.Before; import java.io.IOException; @@ -47,15 +52,23 @@ public void testTextValues() throws Exception { XContentParser xContentParser = createXContentParser(input); List fields = parser.parse(xContentParser); - assertEquals(2, fields.size()); + assertEquals(4, fields.size()); IndexableField field1 = fields.get(0); assertEquals("field", field1.name()); assertEquals(new BytesRef("value1"), field1.binaryValue()); - IndexableField field2 = fields.get(1); + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key1\0value1"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); assertEquals("field", field2.name()); assertEquals(new BytesRef("value2"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key2\0value2"), keyedField2.binaryValue()); } public void testNumericValues() throws Exception { @@ -63,11 +76,15 @@ public void testNumericValues() throws Exception { XContentParser xContentParser = createXContentParser(input); List fields = parser.parse(xContentParser); - assertEquals(1, fields.size()); + assertEquals(2, fields.size()); IndexableField field = fields.get(0); assertEquals("field", field.name()); assertEquals(new BytesRef("2.718"), field.binaryValue()); + + IndexableField keyedField = fields.get(1); + assertEquals("field._keyed", keyedField.name()); + assertEquals(new BytesRef("key" + '\0' + "2.718"), keyedField.binaryValue()); } public void testBooleanValues() throws Exception { @@ -75,27 +92,103 @@ public void testBooleanValues() throws Exception { XContentParser xContentParser = createXContentParser(input); List fields = parser.parse(xContentParser); - assertEquals(1, fields.size()); + assertEquals(2, fields.size()); IndexableField field = fields.get(0); assertEquals("field", field.name()); assertEquals(new BytesRef("false"), field.binaryValue()); + + IndexableField keyedField = fields.get(1); + assertEquals("field._keyed", keyedField.name()); + assertEquals(new BytesRef("key\0false"), keyedField.binaryValue()); } - public void testArrays() throws Exception { + public void testBasicArrays() throws Exception { String input = "{ \"key\": [true, false] }"; XContentParser xContentParser = createXContentParser(input); List fields = parser.parse(xContentParser); - assertEquals(2, fields.size()); + assertEquals(4, fields.size()); IndexableField field1 = fields.get(0); assertEquals("field", field1.name()); assertEquals(new BytesRef("true"), field1.binaryValue()); - IndexableField field2 = fields.get(1); + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key\0true"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); assertEquals("field", field2.name()); assertEquals(new BytesRef("false"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key\0false"), keyedField2.binaryValue()); + } + + public void testArrayOfArrays() throws Exception { + String input = "{ \"key\": [[true, \"value\"], 3] }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(6, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("true"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key\0true"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("value"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key\0value"), keyedField2.binaryValue()); + + IndexableField field3 = fields.get(4); + assertEquals("field", field3.name()); + assertEquals(new BytesRef("3"), field3.binaryValue()); + + IndexableField keyedField3 = fields.get(5); + assertEquals("field._keyed", keyedField3.name()); + assertEquals(new BytesRef("key" + "\0" + "3"), keyedField3.binaryValue()); + } + + public void testArraysOfObjects() throws Exception { + String input = "{ \"key1\": [{ \"key2\": true }, false], \"key4\": \"other\" }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(6, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("true"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key1.key2\0true"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("false"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key1\0false"), keyedField2.binaryValue()); + + IndexableField field3 = fields.get(4); + assertEquals("field", field3.name()); + assertEquals(new BytesRef("other"), field3.binaryValue()); + + IndexableField keyedField3 = fields.get(5); + assertEquals("field._keyed", keyedField3.name()); + assertEquals(new BytesRef("key4\0other"), keyedField3.binaryValue()); } public void testNestedObjects() throws Exception { @@ -104,15 +197,23 @@ public void testNestedObjects() throws Exception { XContentParser xContentParser = createXContentParser(input); List fields = parser.parse(xContentParser); - assertEquals(2, fields.size()); + assertEquals(4, fields.size()); IndexableField field1 = fields.get(0); assertEquals("field", field1.name()); assertEquals(new BytesRef("value"), field1.binaryValue()); - IndexableField field2 = fields.get(1); + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("parent1.key\0value"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); assertEquals("field", field2.name()); assertEquals(new BytesRef("value"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("parent2.key\0value"), keyedField2.binaryValue()); } public void testIgnoreAbove() throws Exception { @@ -142,11 +243,62 @@ public void testNullValues() throws Exception { JsonFieldParser nullValueParser = new JsonFieldParser(fieldType, Integer.MAX_VALUE); fields = nullValueParser.parse(xContentParser); - assertEquals(1, fields.size()); + assertEquals(2, fields.size()); IndexableField field = fields.get(0); assertEquals("field", field.name()); assertEquals(new BytesRef("placeholder"), field.binaryValue()); + + IndexableField keyedField = fields.get(1); + assertEquals("field._keyed", keyedField.name()); + assertEquals(new BytesRef("key\0placeholder"), keyedField.binaryValue()); + } + + public void testMalformedJson() throws Exception { + String input = "{ \"key\": [true, false }"; + XContentParser xContentParser = createXContentParser(input); + + expectThrows(JsonParseException.class, () -> parser.parse(xContentParser)); + } + + public void testEmptyObject() throws Exception { + String input = "{}"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(0, fields.size()); + } + + public void testRandomFields() throws Exception { + BytesReference input = BytesReference.bytes( + XContentBuilder.builder(JsonXContent.jsonXContent) + .startObject() + .startObject("object") + .field("key", "value") + .endObject() + .startArray("array") + .value(2.718) + .endArray() + .endObject()); + + input = XContentTestUtils.insertRandomFields(XContentType.JSON, input, null, random()); + XContentParser xContentParser = createXContentParser(input.utf8ToString()); + + List fields = parser.parse(xContentParser); + assertTrue(fields.size() > 4); + } + + public void testReservedCharacters() throws Exception { + BytesReference input = BytesReference.bytes( + XContentBuilder.builder(JsonXContent.jsonXContent) + .startObject() + .field("k\0y", "value") + .endObject()); + XContentParser xContentParser = createXContentParser(input.utf8ToString()); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> parser.parse(xContentParser)); + assertEquals("Keys in [json] fields cannot contain the reserved character \\0. Offending key: [k\0y].", + e.getMessage()); } private XContentParser createXContentParser(String input) throws IOException { diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 7007c7650f41b..bfa96c57ff3ac 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -1875,4 +1875,45 @@ public void testFieldAliasesForMetaFields() throws Exception { DocumentField field = hit.getFields().get("id-alias"); assertThat(field.getValue().toString(), equalTo("1")); } + + public void testJsonField() throws Exception { + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("properties") + .startObject("headers") + .field("type", "json") + .endObject() + .endObject() + .endObject() + .endObject(); + assertAcked(prepareCreate("test").addMapping("type", mapping)); + + XContentBuilder source = XContentFactory.jsonBuilder() + .startObject() + .startObject("headers") + .field("content-type", "application/json") + .endObject() + .endObject(); + IndexRequestBuilder indexRequest = client().prepareIndex("test", "type") + .setId("1") + .setRouting("custom") + .setSource(source); + indexRandom(true, false, indexRequest); + + SearchResponse searchResponse = client().prepareSearch() + .setQuery(prefixQuery("headers", "application/")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(existsQuery("headers")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(prefixQuery("headers", "content")) + .get(); + assertHitCount(searchResponse, 0L); + } }