diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java index 2a6199486a2ac..7d7b194de8ae6 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportRunAnalyticsAction.java @@ -185,6 +185,9 @@ private void runPipelineAnalytics(String index, ActionListener IGNORE_FIELDS = Arrays.asList("_id", "_field_names", "_index", "_parent", "_routing", "_seq_no", "_source", "_type", "_uid", "_version", "_feature", "_ignored"); + /** + * The types supported by data frames + */ + private static final Set COMPATIBLE_FIELD_TYPES; + + static { + Set compatibleTypes = Stream.of(NumberFieldMapper.NumberType.values()) + .map(NumberFieldMapper.NumberType::typeName) + .collect(Collectors.toSet()); + compatibleTypes.add("scaled_float"); // have to add manually since scaled_float is in a module + + COMPATIBLE_FIELD_TYPES = Collections.unmodifiableSet(compatibleTypes); + } + private final Client client; private final String index; private final ExtractedFields extractedFields; @@ -82,10 +102,27 @@ public static void create(Client client, Map headers, String ind }); } - private static ExtractedFields detectExtractedFields(FieldCapabilitiesResponse fieldCapabilitiesResponse) { + // Visible for testing + static ExtractedFields detectExtractedFields(FieldCapabilitiesResponse fieldCapabilitiesResponse) { Set fields = fieldCapabilitiesResponse.get().keySet(); fields.removeAll(IGNORE_FIELDS); - return ExtractedFields.build(new ArrayList<>(fields), Collections.emptySet(), fieldCapabilitiesResponse) + removeFieldsWithIncompatibleTypes(fields, fieldCapabilitiesResponse); + ExtractedFields extractedFields = ExtractedFields.build(new ArrayList<>(fields), Collections.emptySet(), fieldCapabilitiesResponse) .filterFields(ExtractedField.ExtractionMethod.DOC_VALUE); + if (extractedFields.getAllFields().isEmpty()) { + throw ExceptionsHelper.badRequestException("No compatible fields could be detected"); + } + return extractedFields; + } + + private static void removeFieldsWithIncompatibleTypes(Set fields, FieldCapabilitiesResponse fieldCapabilitiesResponse) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + Map fieldCaps = fieldCapabilitiesResponse.getField(field); + if (fieldCaps == null || COMPATIBLE_FIELD_TYPES.containsAll(fieldCaps.keySet()) == false) { + fieldsIterator.remove(); + } + } } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java new file mode 100644 index 0000000000000..1a43b2893baef --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/analytics/DataFrameDataExtractorFactoryTests.java @@ -0,0 +1,115 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.analytics; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.fieldcaps.FieldCapabilities; +import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedField; +import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedFields; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DataFrameDataExtractorFactoryTests extends ESTestCase { + + public void testDetectExtractedFields_GivenFloatField() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_float", "float").build(); + + ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities); + + List allFields = extractedFields.getAllFields(); + assertThat(allFields.size(), equalTo(1)); + assertThat(allFields.get(0).getName(), equalTo("some_float")); + } + + public void testDetectExtractedFields_GivenNumericFieldWithMultipleTypes() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_number", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float") + .build(); + + ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities); + + List allFields = extractedFields.getAllFields(); + assertThat(allFields.size(), equalTo(1)); + assertThat(allFields.get(0).getName(), equalTo("some_number")); + } + + public void testDetectExtractedFields_GivenNonNumericField() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_keyword", "keyword").build(); + + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, + () -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities)); + assertThat(e.getMessage(), equalTo("No compatible fields could be detected")); + } + + public void testDetectExtractedFields_GivenFieldWithNumericAndNonNumericTypes() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("indecisive_field", "float", "keyword").build(); + + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, + () -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities)); + assertThat(e.getMessage(), equalTo("No compatible fields could be detected")); + } + + public void testDetectExtractedFields_GivenMultipleFields() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("some_float", "float") + .addAggregatableField("some_long", "long") + .addAggregatableField("some_keyword", "keyword") + .build(); + + ExtractedFields extractedFields = DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities); + + List allFields = extractedFields.getAllFields(); + assertThat(allFields.size(), equalTo(2)); + assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), + containsInAnyOrder("some_float", "some_long")); + } + + public void testDetectExtractedFields_GivenIgnoredField() { + FieldCapabilitiesResponse fieldCapabilities= new MockFieldCapsResponseBuilder() + .addAggregatableField("_id", "float").build(); + + ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, + () -> DataFrameDataExtractorFactory.detectExtractedFields(fieldCapabilities)); + assertThat(e.getMessage(), equalTo("No compatible fields could be detected")); + } + + private static class MockFieldCapsResponseBuilder { + + private final Map> fieldCaps = new HashMap<>(); + + private MockFieldCapsResponseBuilder addAggregatableField(String field, String... types) { + Map caps = new HashMap<>(); + for (String type : types) { + caps.put(type, new FieldCapabilities(field, type, true, true)); + } + fieldCaps.put(field, caps); + return this; + } + + private FieldCapabilitiesResponse build() { + FieldCapabilitiesResponse response = mock(FieldCapabilitiesResponse.class); + when(response.get()).thenReturn(fieldCaps); + + for (String field : fieldCaps.keySet()) { + when(response.getField(field)).thenReturn(fieldCaps.get(field)); + } + return response; + } + } +}