diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfig.java index afe38dc29bf7f..61961f8e9cf83 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfig.java @@ -63,6 +63,7 @@ public class DatafeedConfig extends AbstractDiffable implements // Used for QueryPage public static final ParseField RESULTS_FIELD = new ParseField("datafeeds"); + public static String TYPE = "datafeed"; /** * The field name used to specify document counts in Elasticsearch diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/AnalysisConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/AnalysisConfig.java index 9068ffda4de55..e4e41697bec62 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/AnalysisConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/AnalysisConfig.java @@ -53,17 +53,17 @@ public class AnalysisConfig implements ToXContentObject, Writeable { * Serialisation names */ public static final ParseField ANALYSIS_CONFIG = new ParseField("analysis_config"); - private static final ParseField BUCKET_SPAN = new ParseField("bucket_span"); - private static final ParseField CATEGORIZATION_FIELD_NAME = new ParseField("categorization_field_name"); - static final ParseField CATEGORIZATION_FILTERS = new ParseField("categorization_filters"); - private static final ParseField CATEGORIZATION_ANALYZER = CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER; - private static final ParseField LATENCY = new ParseField("latency"); - private static final ParseField SUMMARY_COUNT_FIELD_NAME = new ParseField("summary_count_field_name"); - private static final ParseField DETECTORS = new ParseField("detectors"); - private static final ParseField INFLUENCERS = new ParseField("influencers"); - private static final ParseField OVERLAPPING_BUCKETS = new ParseField("overlapping_buckets"); - private static final ParseField RESULT_FINALIZATION_WINDOW = new ParseField("result_finalization_window"); - private static final ParseField MULTIVARIATE_BY_FIELDS = new ParseField("multivariate_by_fields"); + public static final ParseField BUCKET_SPAN = new ParseField("bucket_span"); + public static final ParseField CATEGORIZATION_FIELD_NAME = new ParseField("categorization_field_name"); + public static final ParseField CATEGORIZATION_FILTERS = new ParseField("categorization_filters"); + public static final ParseField CATEGORIZATION_ANALYZER = CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER; + public static final ParseField LATENCY = new ParseField("latency"); + public static final ParseField SUMMARY_COUNT_FIELD_NAME = new ParseField("summary_count_field_name"); + public static final ParseField DETECTORS = new ParseField("detectors"); + public static final ParseField INFLUENCERS = new ParseField("influencers"); + public static final ParseField OVERLAPPING_BUCKETS = new ParseField("overlapping_buckets"); + public static final ParseField RESULT_FINALIZATION_WINDOW = new ParseField("result_finalization_window"); + public static final ParseField MULTIVARIATE_BY_FIELDS = new ParseField("multivariate_by_fields"); public static final String ML_CATEGORY_FIELD = "mlcategory"; public static final Set AUTO_CREATED_FIELDS = new HashSet<>(Collections.singletonList(ML_CATEGORY_FIELD)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java index 36c25e0a7a7aa..e0b66e30f2496 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java @@ -53,9 +53,9 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeable { public static final ParseField CATEGORIZATION_ANALYZER = new ParseField("categorization_analyzer"); - private static final ParseField TOKENIZER = RestAnalyzeAction.Fields.TOKENIZER; - private static final ParseField TOKEN_FILTERS = RestAnalyzeAction.Fields.TOKEN_FILTERS; - private static final ParseField CHAR_FILTERS = RestAnalyzeAction.Fields.CHAR_FILTERS; + public static final ParseField TOKENIZER = RestAnalyzeAction.Fields.TOKENIZER; + public static final ParseField TOKEN_FILTERS = RestAnalyzeAction.Fields.TOKEN_FILTERS; + public static final ParseField CHAR_FILTERS = RestAnalyzeAction.Fields.CHAR_FILTERS; /** * This method is only used in the unit tests - in production code this config is always parsed as a fragment. diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/DataDescription.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/DataDescription.java index 87c084baeac95..022181bd8f026 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/DataDescription.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/DataDescription.java @@ -77,12 +77,12 @@ public String toString() { } } - private static final ParseField DATA_DESCRIPTION_FIELD = new ParseField("data_description"); - private static final ParseField FORMAT_FIELD = new ParseField("format"); - private static final ParseField TIME_FIELD_NAME_FIELD = new ParseField("time_field"); - private static final ParseField TIME_FORMAT_FIELD = new ParseField("time_format"); - private static final ParseField FIELD_DELIMITER_FIELD = new ParseField("field_delimiter"); - private static final ParseField QUOTE_CHARACTER_FIELD = new ParseField("quote_character"); + public static final ParseField DATA_DESCRIPTION_FIELD = new ParseField("data_description"); + public static final ParseField FORMAT_FIELD = new ParseField("format"); + public static final ParseField TIME_FIELD_NAME_FIELD = new ParseField("time_field"); + public static final ParseField TIME_FORMAT_FIELD = new ParseField("time_format"); + public static final ParseField FIELD_DELIMITER_FIELD = new ParseField("field_delimiter"); + public static final ParseField QUOTE_CHARACTER_FIELD = new ParseField("quote_character"); /** * Special time format string for epoch times (seconds) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/ModelPlotConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/ModelPlotConfig.java index 98aa618dd1ee9..824df9f88f5ef 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/ModelPlotConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/ModelPlotConfig.java @@ -18,8 +18,8 @@ public class ModelPlotConfig implements ToXContentObject, Writeable { - private static final ParseField TYPE_FIELD = new ParseField("model_plot_config"); - private static final ParseField ENABLED_FIELD = new ParseField("enabled"); + public static final ParseField TYPE_FIELD = new ParseField("model_plot_config"); + public static final ParseField ENABLED_FIELD = new ParseField("enabled"); public static final ParseField TERMS_FIELD = new ParseField("terms"); // These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java index 4e51d7b6c1e30..6cf4aee2a9672 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java @@ -56,4 +56,14 @@ public static String getPhysicalIndexFromState(ClusterState state, String jobId) public static String jobStateIndexName() { return AnomalyDetectorsIndexFields.STATE_INDEX_NAME; } + + /** + * The name of the index where job and datafeed configuration + * is stored + * @return The index name + */ + public static String configIndexName() { + return AnomalyDetectorsIndexFields.CONFIG_INDEX; + } + } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java index 9cdaf10326dfb..527ba5dc1458b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndexFields.java @@ -7,6 +7,7 @@ public final class AnomalyDetectorsIndexFields { + public static final String CONFIG_INDEX = ".ml-config"; public static final String RESULTS_INDEX_PREFIX = ".ml-anomalies-"; public static final String STATE_INDEX_NAME = ".ml-state"; public static final String RESULTS_INDEX_DEFAULT = "shared"; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappings.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappings.java index 316417f4b23aa..085a685d610d8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappings.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappings.java @@ -7,8 +7,18 @@ import org.elasticsearch.Version; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.datafeed.ChunkingConfig; +import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; +import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; +import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits; +import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; +import org.elasticsearch.xpack.core.ml.job.config.DataDescription; +import org.elasticsearch.xpack.core.ml.job.config.DetectionRule; import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.core.ml.job.config.ModelPlotConfig; +import org.elasticsearch.xpack.core.ml.job.config.Operator; +import org.elasticsearch.xpack.core.ml.job.config.RuleCondition; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot; @@ -34,8 +44,8 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; /** - * Static methods to create Elasticsearch mappings for the autodetect - * persisted objects/documents + * Static methods to create Elasticsearch index mappings for the autodetect + * persisted objects/documents and configurations *

* ElasticSearch automatically recognises array types so they are * not explicitly mapped as such. For arrays of objects the type @@ -79,6 +89,11 @@ public class ElasticsearchMappings { */ public static final String ES_DOC = "_doc"; + /** + * The configuration document type + */ + public static final String CONFIG_TYPE = "config_type"; + /** * Elasticsearch data types */ @@ -95,6 +110,277 @@ public class ElasticsearchMappings { private ElasticsearchMappings() { } + public static XContentBuilder configMapping() throws IOException { + XContentBuilder builder = jsonBuilder(); + builder.startObject(); + builder.startObject(DOC_TYPE); + addMetaInformation(builder); + addDefaultMapping(builder); + builder.startObject(PROPERTIES); + + addJobConfigFields(builder); + addDatafeedConfigFields(builder); + + builder.endObject() + .endObject() + .endObject(); + return builder; + } + + public static void addJobConfigFields(XContentBuilder builder) throws IOException { + + builder.startObject(CONFIG_TYPE) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.JOB_TYPE.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.JOB_VERSION.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.GROUPS.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.ANALYSIS_CONFIG.getPreferredName()) + .startObject(PROPERTIES) + .startObject(AnalysisConfig.BUCKET_SPAN.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(AnalysisConfig.CATEGORIZATION_FIELD_NAME.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(AnalysisConfig.CATEGORIZATION_FILTERS.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(AnalysisConfig.CATEGORIZATION_ANALYZER.getPreferredName()) + .startObject(PROPERTIES) + .startObject(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + // TOKENIZER, TOKEN_FILTERS and CHAR_FILTERS are complex types, don't parse or index + .startObject(CategorizationAnalyzerConfig.TOKENIZER.getPreferredName()) + .field(ENABLED, false) + .endObject() + .startObject(CategorizationAnalyzerConfig.TOKEN_FILTERS.getPreferredName()) + .field(ENABLED, false) + .endObject() + .startObject(CategorizationAnalyzerConfig.CHAR_FILTERS.getPreferredName()) + .field(ENABLED, false) + .endObject() + .endObject() + .endObject() + .startObject(AnalysisConfig.LATENCY.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(AnalysisConfig.SUMMARY_COUNT_FIELD_NAME.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(AnalysisConfig.DETECTORS.getPreferredName()) + .startObject(PROPERTIES) + .startObject(Detector.DETECTOR_DESCRIPTION_FIELD.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .startObject(Detector.FUNCTION_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Detector.FIELD_NAME_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Detector.BY_FIELD_NAME_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Detector.OVER_FIELD_NAME_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Detector.USE_NULL_FIELD.getPreferredName()) + .field(TYPE, BOOLEAN) + .endObject() + .startObject(Detector.EXCLUDE_FREQUENT_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Detector.CUSTOM_RULES_FIELD.getPreferredName()) + .field(TYPE, NESTED) + .startObject(PROPERTIES) + .startObject(DetectionRule.ACTIONS_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + // RuleScope is a map + .startObject(DetectionRule.SCOPE_FIELD.getPreferredName()) + .field(ENABLED, false) + .endObject() + .startObject(DetectionRule.CONDITIONS_FIELD.getPreferredName()) + .field(TYPE, NESTED) + .startObject(PROPERTIES) + .startObject(RuleCondition.APPLIES_TO_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Operator.OPERATOR_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(RuleCondition.VALUE_FIELD.getPreferredName()) + .field(TYPE, DOUBLE) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .startObject(Detector.DETECTOR_INDEX.getPreferredName()) + .field(TYPE, INTEGER) + .endObject() + .endObject() + .endObject() + + .startObject(AnalysisConfig.INFLUENCERS.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(AnalysisConfig.OVERLAPPING_BUCKETS.getPreferredName()) + .field(TYPE, BOOLEAN) + .endObject() + .startObject(AnalysisConfig.RESULT_FINALIZATION_WINDOW.getPreferredName()) + .field(TYPE, LONG) // TODO This should be made a time value + .endObject() + .startObject(AnalysisConfig.MULTIVARIATE_BY_FIELDS.getPreferredName()) + .field(TYPE, BOOLEAN) + .endObject() + .startObject(AnalysisConfig.USE_PER_PARTITION_NORMALIZATION.getPreferredName()) + .field(TYPE, BOOLEAN) + .endObject() + .endObject() + .endObject() + + .startObject(Job.ANALYSIS_LIMITS.getPreferredName()) + .startObject(PROPERTIES) + .startObject(AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName()) + .field(TYPE, KEYWORD) // TODO Should be a ByteSizeValue + .endObject() + .startObject(AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .endObject() + .endObject() + + .startObject(Job.CREATE_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + + .startObject(Job.CUSTOM_SETTINGS.getPreferredName()) + // Custom settings are an untyped map + .field(ENABLED, false) + .endObject() + + .startObject(Job.DATA_DESCRIPTION.getPreferredName()) + .startObject(PROPERTIES) + .startObject(DataDescription.FORMAT_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DataDescription.TIME_FIELD_NAME_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DataDescription.TIME_FORMAT_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DataDescription.FIELD_DELIMITER_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DataDescription.QUOTE_CHARACTER_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .endObject() + .endObject() + + .startObject(Job.DESCRIPTION.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .startObject(Job.FINISHED_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .startObject(Job.LAST_DATA_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .startObject(Job.ESTABLISHED_MODEL_MEMORY.getPreferredName()) + .field(TYPE, LONG) // TODO should be ByteSizeValue + .endObject() + + .startObject(Job.MODEL_PLOT_CONFIG.getPreferredName()) + .startObject(PROPERTIES) + .startObject(ModelPlotConfig.ENABLED_FIELD.getPreferredName()) + .field(TYPE, BOOLEAN) + .endObject() + .startObject(ModelPlotConfig.TERMS_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .endObject() + .endObject() + + .startObject(Job.RENORMALIZATION_WINDOW_DAYS.getPreferredName()) + .field(TYPE, LONG) // TODO should be TimeValue + .endObject() + .startObject(Job.BACKGROUND_PERSIST_INTERVAL.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName()) + .field(TYPE, LONG) // TODO should be TimeValue + .endObject() + .startObject(Job.RESULTS_RETENTION_DAYS.getPreferredName()) + .field(TYPE, LONG) // TODO should be TimeValue + .endObject() + .startObject(Job.MODEL_SNAPSHOT_ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Job.RESULTS_INDEX_NAME.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject(); + } + + public static void addDatafeedConfigFields(XContentBuilder builder) throws IOException { + builder.startObject(DatafeedConfig.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DatafeedConfig.QUERY_DELAY.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DatafeedConfig.FREQUENCY.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DatafeedConfig.INDICES.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DatafeedConfig.TYPES.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DatafeedConfig.QUERY.getPreferredName()) + .field(ENABLED, false) + .endObject() + .startObject(DatafeedConfig.SCROLL_SIZE.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DatafeedConfig.AGGREGATIONS.getPreferredName()) + .field(ENABLED, false) + .endObject() + .startObject(DatafeedConfig.SCRIPT_FIELDS.getPreferredName()) + .field(ENABLED, false) + .endObject() + .startObject(DatafeedConfig.CHUNKING_CONFIG.getPreferredName()) + .startObject(PROPERTIES) + .startObject(ChunkingConfig.MODE_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(ChunkingConfig.TIME_SPAN_FIELD.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .endObject() + .endObject() + .startObject(DatafeedConfig.HEADERS.getPreferredName()) + .field(ENABLED, false) + .endObject(); + } + /** * Creates a default mapping which has a dynamic template that * treats all dynamically added fields as keywords. This is needed @@ -129,11 +415,11 @@ public static void addMetaInformation(XContentBuilder builder) throws IOExceptio .endObject(); } - public static XContentBuilder docMapping() throws IOException { - return docMapping(Collections.emptyList()); + public static XContentBuilder resultsMapping() throws IOException { + return resultsMapping(Collections.emptyList()); } - public static XContentBuilder docMapping(Collection extraTermFields) throws IOException { + public static XContentBuilder resultsMapping(Collection extraTermFields) throws IOException { XContentBuilder builder = jsonBuilder(); builder.startObject(); builder.startObject(DOC_TYPE); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ReservedFieldNames.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ReservedFieldNames.java index 63c4278e541d4..2e37cf1e1ae05 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ReservedFieldNames.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ReservedFieldNames.java @@ -5,8 +5,18 @@ */ package org.elasticsearch.xpack.core.ml.job.results; +import org.elasticsearch.xpack.core.ml.datafeed.ChunkingConfig; +import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; +import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; +import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits; +import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; +import org.elasticsearch.xpack.core.ml.job.config.DataDescription; +import org.elasticsearch.xpack.core.ml.job.config.DetectionRule; import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.core.ml.job.config.ModelPlotConfig; +import org.elasticsearch.xpack.core.ml.job.config.Operator; +import org.elasticsearch.xpack.core.ml.job.config.RuleCondition; import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats; @@ -36,7 +46,7 @@ public final class ReservedFieldNames { * 2.x requires mappings for given fields be consistent across all types * in a given index.) */ - private static final String[] RESERVED_FIELD_NAME_ARRAY = { + private static final String[] RESERVED_RESULT_FIELD_NAME_ARRAY = { ElasticsearchMappings.ALL_FIELD_VALUES, Job.ID.getPreferredName(), @@ -163,25 +173,121 @@ public final class ReservedFieldNames { }; /** - * Test if fieldName is one of the reserved names or if it contains dots then - * that the segment before the first dot is not a reserved name. A fieldName - * containing dots represents nested fields in which case we only care about - * the top level. + * This array should be updated to contain all the field names that appear + * in any documents we store in our config index. + */ + private static final String[] RESERVED_CONFIG_FIELD_NAME_ARRAY = { + Job.ID.getPreferredName(), + Job.JOB_TYPE.getPreferredName(), + Job.JOB_VERSION.getPreferredName(), + Job.GROUPS.getPreferredName(), + Job.ANALYSIS_CONFIG.getPreferredName(), + Job.ANALYSIS_LIMITS.getPreferredName(), + Job.CREATE_TIME.getPreferredName(), + Job.CUSTOM_SETTINGS.getPreferredName(), + Job.DATA_DESCRIPTION.getPreferredName(), + Job.DESCRIPTION.getPreferredName(), + Job.FINISHED_TIME.getPreferredName(), + Job.LAST_DATA_TIME.getPreferredName(), + Job.ESTABLISHED_MODEL_MEMORY.getPreferredName(), + Job.MODEL_PLOT_CONFIG.getPreferredName(), + Job.RENORMALIZATION_WINDOW_DAYS.getPreferredName(), + Job.BACKGROUND_PERSIST_INTERVAL.getPreferredName(), + Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), + Job.RESULTS_RETENTION_DAYS.getPreferredName(), + Job.MODEL_SNAPSHOT_ID.getPreferredName(), + Job.RESULTS_INDEX_NAME.getPreferredName(), + + AnalysisConfig.BUCKET_SPAN.getPreferredName(), + AnalysisConfig.CATEGORIZATION_FIELD_NAME.getPreferredName(), + AnalysisConfig.CATEGORIZATION_FILTERS.getPreferredName(), + AnalysisConfig.CATEGORIZATION_ANALYZER.getPreferredName(), + AnalysisConfig.LATENCY.getPreferredName(), + AnalysisConfig.SUMMARY_COUNT_FIELD_NAME.getPreferredName(), + AnalysisConfig.DETECTORS.getPreferredName(), + AnalysisConfig.INFLUENCERS.getPreferredName(), + AnalysisConfig.OVERLAPPING_BUCKETS.getPreferredName(), + AnalysisConfig.RESULT_FINALIZATION_WINDOW.getPreferredName(), + AnalysisConfig.MULTIVARIATE_BY_FIELDS.getPreferredName(), + AnalysisConfig.USE_PER_PARTITION_NORMALIZATION.getPreferredName(), + + AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(), + AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), + + CategorizationAnalyzerConfig.CHAR_FILTERS.getPreferredName(), + CategorizationAnalyzerConfig.TOKENIZER.getPreferredName(), + CategorizationAnalyzerConfig.TOKEN_FILTERS.getPreferredName(), + + Detector.DETECTOR_DESCRIPTION_FIELD.getPreferredName(), + Detector.FUNCTION_FIELD.getPreferredName(), + Detector.FIELD_NAME_FIELD.getPreferredName(), + Detector.BY_FIELD_NAME_FIELD.getPreferredName(), + Detector.OVER_FIELD_NAME_FIELD.getPreferredName(), + Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), + Detector.USE_NULL_FIELD.getPreferredName(), + Detector.EXCLUDE_FREQUENT_FIELD.getPreferredName(), + Detector.CUSTOM_RULES_FIELD.getPreferredName(), + Detector.DETECTOR_INDEX.getPreferredName(), + + DetectionRule.ACTIONS_FIELD.getPreferredName(), + DetectionRule.CONDITIONS_FIELD.getPreferredName(), + DetectionRule.SCOPE_FIELD.getPreferredName(), + RuleCondition.APPLIES_TO_FIELD.getPreferredName(), + RuleCondition.VALUE_FIELD.getPreferredName(), + Operator.OPERATOR_FIELD.getPreferredName(), + + DataDescription.FORMAT_FIELD.getPreferredName(), + DataDescription.TIME_FIELD_NAME_FIELD.getPreferredName(), + DataDescription.TIME_FORMAT_FIELD.getPreferredName(), + DataDescription.FIELD_DELIMITER_FIELD.getPreferredName(), + DataDescription.QUOTE_CHARACTER_FIELD.getPreferredName(), + + ModelPlotConfig.ENABLED_FIELD.getPreferredName(), + ModelPlotConfig.TERMS_FIELD.getPreferredName(), + + DatafeedConfig.ID.getPreferredName(), + DatafeedConfig.QUERY_DELAY.getPreferredName(), + DatafeedConfig.FREQUENCY.getPreferredName(), + DatafeedConfig.INDICES.getPreferredName(), + DatafeedConfig.TYPES.getPreferredName(), + DatafeedConfig.QUERY.getPreferredName(), + DatafeedConfig.SCROLL_SIZE.getPreferredName(), + DatafeedConfig.AGGREGATIONS.getPreferredName(), + DatafeedConfig.SCRIPT_FIELDS.getPreferredName(), + DatafeedConfig.CHUNKING_CONFIG.getPreferredName(), + DatafeedConfig.HEADERS.getPreferredName(), + + ChunkingConfig.MODE_FIELD.getPreferredName(), + ChunkingConfig.TIME_SPAN_FIELD.getPreferredName(), + + ElasticsearchMappings.CONFIG_TYPE + }; + + /** + * Test if fieldName is one of the reserved result fieldnames or if it contains + * dots then that the segment before the first dot is not a reserved results + * fieldname. A fieldName containing dots represents nested fields in which + * case we only care about the top level. * * @param fieldName Document field name. This may contain dots '.' - * @return True if fieldName is not a reserved name or the top level segment + * @return True if fieldName is not a reserved results fieldname or the top level segment * is not a reserved name. */ public static boolean isValidFieldName(String fieldName) { String[] segments = DOT_PATTERN.split(fieldName); - return !RESERVED_FIELD_NAMES.contains(segments[0]); + return RESERVED_RESULT_FIELD_NAMES.contains(segments[0]) == false; } /** * A set of all reserved field names in our results. Fields from the raw * data with these names are not added to any result. */ - public static final Set RESERVED_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_FIELD_NAME_ARRAY)); + public static final Set RESERVED_RESULT_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_RESULT_FIELD_NAME_ARRAY)); + + /** + * A set of all reserved field names in our config. + */ + public static final Set RESERVED_CONFIG_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_CONFIG_FIELD_NAME_ARRAY)); private ReservedFieldNames() { } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappingsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappingsTests.java index 2b644c4aa5be0..e4ce536a3ccf6 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappingsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/persistence/ElasticsearchMappingsTests.java @@ -13,6 +13,9 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; +import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.core.ml.job.config.ModelPlotConfig; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot; @@ -28,25 +31,28 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; public class ElasticsearchMappingsTests extends ESTestCase { - public void testReservedFields() throws Exception { - Set overridden = new HashSet<>(); - - // These are not reserved because they're Elasticsearch keywords, not - // field names - overridden.add(ElasticsearchMappings.ANALYZER); - overridden.add(ElasticsearchMappings.COPY_TO); - overridden.add(ElasticsearchMappings.DYNAMIC); - overridden.add(ElasticsearchMappings.ENABLED); - overridden.add(ElasticsearchMappings.NESTED); - overridden.add(ElasticsearchMappings.PROPERTIES); - overridden.add(ElasticsearchMappings.TYPE); - overridden.add(ElasticsearchMappings.WHITESPACE); + // These are not reserved because they're Elasticsearch keywords, not + // field names + private static List KEYWORDS = Arrays.asList( + ElasticsearchMappings.ANALYZER, + ElasticsearchMappings.COPY_TO, + ElasticsearchMappings.DYNAMIC, + ElasticsearchMappings.ENABLED, + ElasticsearchMappings.NESTED, + ElasticsearchMappings.PROPERTIES, + ElasticsearchMappings.TYPE, + ElasticsearchMappings.WHITESPACE + ); + + public void testResultsMapppingReservedFields() throws Exception { + Set overridden = new HashSet<>(KEYWORDS); // These are not reserved because they're data types, not field names overridden.add(Result.TYPE.getPreferredName()); @@ -57,25 +63,44 @@ public void testReservedFields() throws Exception { overridden.add(Quantiles.TYPE.getPreferredName()); Set expected = collectResultsDocFieldNames(); + expected.removeAll(overridden); + + compareFields(expected, ReservedFieldNames.RESERVED_RESULT_FIELD_NAMES); + } + + public void testConfigMapppingReservedFields() throws Exception { + Set overridden = new HashSet<>(KEYWORDS); + + // These are not reserved because they're data types, not field names + overridden.add(Job.TYPE); + overridden.add(DatafeedConfig.TYPE); + // ModelPlotConfig has an 'enabled' the same as one of the keywords + overridden.remove(ModelPlotConfig.ENABLED_FIELD.getPreferredName()); + Set expected = collectConfigDocFieldNames(); expected.removeAll(overridden); - if (ReservedFieldNames.RESERVED_FIELD_NAMES.size() != expected.size()) { - Set diff = new HashSet<>(ReservedFieldNames.RESERVED_FIELD_NAMES); + compareFields(expected, ReservedFieldNames.RESERVED_CONFIG_FIELD_NAMES); + } + + + private void compareFields(Set expected, Set reserved) { + if (reserved.size() != expected.size()) { + Set diff = new HashSet<>(reserved); diff.removeAll(expected); StringBuilder errorMessage = new StringBuilder("Fields in ReservedFieldNames but not in expected: ").append(diff); diff = new HashSet<>(expected); - diff.removeAll(ReservedFieldNames.RESERVED_FIELD_NAMES); + diff.removeAll(reserved); errorMessage.append("\nFields in expected but not in ReservedFieldNames: ").append(diff); fail(errorMessage.toString()); } - assertEquals(ReservedFieldNames.RESERVED_FIELD_NAMES.size(), expected.size()); + assertEquals(reserved.size(), expected.size()); for (String s : expected) { // By comparing like this the failure messages say which string is missing - String reserved = ReservedFieldNames.RESERVED_FIELD_NAMES.contains(s) ? s : null; - assertEquals(s, reserved); + String reservedField = reserved.contains(s) ? s : null; + assertEquals(s, reservedField); } } @@ -105,10 +130,17 @@ public void testTermFieldMapping() throws IOException { private Set collectResultsDocFieldNames() throws IOException { // Only the mappings for the results index should be added below. Do NOT add mappings for other indexes here. + return collectFieldNames(ElasticsearchMappings.resultsMapping()); + } + + private Set collectConfigDocFieldNames() throws IOException { + // Only the mappings for the config index should be added below. Do NOT add mappings for other indexes here. + return collectFieldNames(ElasticsearchMappings.configMapping()); + } - XContentBuilder builder = ElasticsearchMappings.docMapping(); + private Set collectFieldNames(XContentBuilder mapping) throws IOException { BufferedInputStream inputStream = - new BufferedInputStream(new ByteArrayInputStream(Strings.toString(builder).getBytes(StandardCharsets.UTF_8))); + new BufferedInputStream(new ByteArrayInputStream(Strings.toString(mapping).getBytes(StandardCharsets.UTF_8))); JsonParser parser = new JsonFactory().createParser(inputStream); Set fieldNames = new HashSet<>(); boolean isAfterPropertiesStart = false; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 92da6043017f5..2ef7e8546c576 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -647,6 +647,23 @@ public UnaryOperator> getIndexTemplateMetaDat logger.warn("Error loading the template for the " + MlMetaIndex.INDEX_NAME + " index", e); } + try (XContentBuilder configMapping = ElasticsearchMappings.configMapping()) { + IndexTemplateMetaData configTemplate = IndexTemplateMetaData.builder(AnomalyDetectorsIndex.configIndexName()) + .patterns(Collections.singletonList(AnomalyDetectorsIndex.configIndexName())) + .settings(Settings.builder() + // Our indexes are small and one shard puts the + // least possible burden on Elasticsearch + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1") + .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting)) + .version(Version.CURRENT.id) + .putMapping(ElasticsearchMappings.DOC_TYPE, Strings.toString(configMapping)) + .build(); + templates.put(AnomalyDetectorsIndex.configIndexName(), configTemplate); + } catch (IOException e) { + logger.warn("Error loading the template for the " + AnomalyDetectorsIndex.configIndexName() + " index", e); + } + try (XContentBuilder stateMapping = ElasticsearchMappings.stateMapping()) { IndexTemplateMetaData stateTemplate = IndexTemplateMetaData.builder(AnomalyDetectorsIndex.jobStateIndexName()) .patterns(Collections.singletonList(AnomalyDetectorsIndex.jobStateIndexName())) @@ -662,7 +679,7 @@ public UnaryOperator> getIndexTemplateMetaDat logger.error("Error loading the template for the " + AnomalyDetectorsIndex.jobStateIndexName() + " index", e); } - try (XContentBuilder docMapping = ElasticsearchMappings.docMapping()) { + try (XContentBuilder docMapping = ElasticsearchMappings.resultsMapping()) { IndexTemplateMetaData jobResultsTemplate = IndexTemplateMetaData.builder(AnomalyDetectorsIndex.jobResultsIndexPrefix()) .patterns(Collections.singletonList(AnomalyDetectorsIndex.jobResultsIndexPrefix() + "*")) .settings(Settings.builder() diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java index 6341200d12b4d..814892ec3c36d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java @@ -556,7 +556,7 @@ public void onFailure(Exception e) { ); // Step 1. Try adding results doc mapping - addDocMappingIfMissing(AnomalyDetectorsIndex.jobResultsAliasedName(jobParams.getJobId()), ElasticsearchMappings::docMapping, + addDocMappingIfMissing(AnomalyDetectorsIndex.jobResultsAliasedName(jobParams.getJobId()), ElasticsearchMappings::resultsMapping, state, resultsPutMappingHandler); } else { listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING)); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobResultsProvider.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobResultsProvider.java index facde323a3df6..a2a75881e4f0c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobResultsProvider.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobResultsProvider.java @@ -336,7 +336,7 @@ public static int countFields(Map mapping) { private void updateIndexMappingWithTermFields(String indexName, Collection termFields, ActionListener listener) { // Put the whole "doc" mapping, not just the term fields, otherwise we'll wipe the _meta section of the mapping - try (XContentBuilder termFieldsMapping = ElasticsearchMappings.docMapping(termFields)) { + try (XContentBuilder termFieldsMapping = ElasticsearchMappings.resultsMapping(termFields)) { final PutMappingRequest request = client.admin().indices().preparePutMapping(indexName).setType(ElasticsearchMappings.DOC_TYPE) .setSource(termFieldsMapping).request(); executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, request, new ActionListener() { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/license/MachineLearningLicensingTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/license/MachineLearningLicensingTests.java index e8ac4285b6b33..75e79ede014d4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/license/MachineLearningLicensingTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/license/MachineLearningLicensingTests.java @@ -54,7 +54,7 @@ public void resetLicensing() { ensureYellow(); } - public void testMachineLearningPutJobActionRestricted() throws Exception { + public void testMachineLearningPutJobActionRestricted() { String jobId = "testmachinelearningputjobactionrestricted"; // Pick a license that does not allow machine learning License.OperationMode mode = randomInvalidLicenseType();