From 8bebbdbdba5e2667aaf238a3e97f563ec3022c8a Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 7 Jun 2018 15:45:48 -0700 Subject: [PATCH 1/8] [DOCS] Adds X-Pack identifier to ML pages --- x-pack/docs/build.gradle | 26 +++++++++++++++++++++ x-pack/docs/en/ml/aggregations.asciidoc | 3 ++- x-pack/docs/en/ml/api-quickref.asciidoc | 4 +++- x-pack/docs/en/ml/categories.asciidoc | 5 ++-- x-pack/docs/en/ml/configuring.asciidoc | 3 ++- x-pack/docs/en/ml/customurl.asciidoc | 2 +- x-pack/docs/en/ml/functions.asciidoc | 3 ++- x-pack/docs/en/ml/functions/count.asciidoc | 9 +++---- x-pack/docs/en/ml/functions/geo.asciidoc | 3 ++- x-pack/docs/en/ml/functions/metric.asciidoc | 9 +++---- x-pack/docs/en/ml/functions/rare.asciidoc | 3 ++- x-pack/docs/en/ml/functions/sum.asciidoc | 8 +++---- x-pack/docs/en/ml/functions/time.asciidoc | 3 ++- x-pack/docs/en/ml/populations.asciidoc | 3 ++- x-pack/docs/en/ml/stopping-ml.asciidoc | 11 +++++---- x-pack/docs/en/ml/transforms.asciidoc | 10 ++------ 16 files changed, 69 insertions(+), 36 deletions(-) diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle index 0d1def2b4f5e5..6e56f5ec0da20 100644 --- a/x-pack/docs/build.gradle +++ b/x-pack/docs/build.gradle @@ -478,6 +478,32 @@ setups['calendar_outages_addevent'] = setups['calendar_outages_addjob'] + ''' { "description": "event 3", "start_time": "1514160000000", "end_time": "1514246400000"} ]} ''' +setups['it_ops_logs_job'] = ''' + - do: + xpack.ml.put_job: + job_id: "it_ops_logs" + body: > + { + "analysis_config" : { + "categorization_field_name": "message", + "bucket_span":"30m", + "detectors" :[ + { + "detector_description": "Unusual message counts", + "function": "count", + "by_field_name": "mlcategory" + }], + "categorization_filters":[ "\\[statement:.*\\]"] + }, + "analysis_limits":{ + "categorization_examples_limit": 5 + }, + "data_description" : { + "time_field":"time", + "time_format": "epoch_ms" + } + } +''' setups['role_mapping'] = ''' - do: xpack.security.put_role_mapping: diff --git a/x-pack/docs/en/ml/aggregations.asciidoc b/x-pack/docs/en/ml/aggregations.asciidoc index f3b8e6b3e34d6..5ff54b76f01b3 100644 --- a/x-pack/docs/en/ml/aggregations.asciidoc +++ b/x-pack/docs/en/ml/aggregations.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-configuring-aggregation]] -=== Aggregating Data For Faster Performance +=== Aggregating data for faster performance By default, {dfeeds} fetch data from {es} using search and scroll requests. It can be significantly more efficient, however, to aggregate data in {es} diff --git a/x-pack/docs/en/ml/api-quickref.asciidoc b/x-pack/docs/en/ml/api-quickref.asciidoc index 9602379c37416..dc87a6ba209c2 100644 --- a/x-pack/docs/en/ml/api-quickref.asciidoc +++ b/x-pack/docs/en/ml/api-quickref.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-api-quickref]] -== API Quick Reference +== API quick reference All {ml} endpoints have the following base: @@ -7,6 +8,7 @@ All {ml} endpoints have the following base: ---- /_xpack/ml/ ---- +// NOTCONSOLE The main {ml} resources can be accessed with a variety of endpoints: diff --git a/x-pack/docs/en/ml/categories.asciidoc b/x-pack/docs/en/ml/categories.asciidoc index bb217e2e18654..21f71b871cbb9 100644 --- a/x-pack/docs/en/ml/categories.asciidoc +++ b/x-pack/docs/en/ml/categories.asciidoc @@ -1,3 +1,4 @@ +[role="xpack"] [[ml-configuring-categories]] === Categorizing log messages @@ -77,7 +78,7 @@ NOTE: To add the `categorization_examples_limit` property, you must use the [float] [[ml-configuring-analyzer]] -==== Customizing the Categorization Analyzer +==== Customizing the categorization analyzer Categorization uses English dictionary words to identify log message categories. By default, it also uses English tokenization rules. For this reason, if you use @@ -213,7 +214,7 @@ API examples above. [float] [[ml-viewing-categories]] -==== Viewing Categorization Results +==== Viewing categorization results After you open the job and start the {dfeed} or supply data to the job, you can view the categorization results in {kib}. For example: diff --git a/x-pack/docs/en/ml/configuring.asciidoc b/x-pack/docs/en/ml/configuring.asciidoc index ba965a08b0462..c2c6e69a71128 100644 --- a/x-pack/docs/en/ml/configuring.asciidoc +++ b/x-pack/docs/en/ml/configuring.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-configuring]] -== Configuring Machine Learning +== Configuring machine learning If you want to use {xpackml} features, there must be at least one {ml} node in your cluster and all master-eligible nodes must have {ml} enabled. By default, diff --git a/x-pack/docs/en/ml/customurl.asciidoc b/x-pack/docs/en/ml/customurl.asciidoc index 7c773c4b9bf49..7c197084c0e5f 100644 --- a/x-pack/docs/en/ml/customurl.asciidoc +++ b/x-pack/docs/en/ml/customurl.asciidoc @@ -48,7 +48,7 @@ using the {ml} APIs. [float] [[ml-configuring-url-strings]] -==== String Substitution in Custom URLs +==== String substitution in custom URLs You can use dollar sign ($) delimited tokens in a custom URL. These tokens are substituted for the values of the corresponding fields in the anomaly records. diff --git a/x-pack/docs/en/ml/functions.asciidoc b/x-pack/docs/en/ml/functions.asciidoc index ae5f768e05697..e32470c6827b6 100644 --- a/x-pack/docs/en/ml/functions.asciidoc +++ b/x-pack/docs/en/ml/functions.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-functions]] -== Function Reference +== Function reference The {xpackml} features include analysis functions that provide a wide variety of flexible ways to analyze data for anomalies. diff --git a/x-pack/docs/en/ml/functions/count.asciidoc b/x-pack/docs/en/ml/functions/count.asciidoc index 4b70f80933dca..c268ef6a395d7 100644 --- a/x-pack/docs/en/ml/functions/count.asciidoc +++ b/x-pack/docs/en/ml/functions/count.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-count-functions]] -=== Count Functions +=== Count functions Count functions detect anomalies when the number of events in a bucket is anomalous. @@ -21,7 +22,7 @@ The {xpackml} features include the following count functions: [float] [[ml-count]] -===== Count, High_count, Low_count +===== Count, high_count, low_count The `count` function detects anomalies when the number of events in a bucket is anomalous. @@ -103,7 +104,7 @@ and the `summary_count_field_name` property. [float] [[ml-nonzero-count]] -===== Non_zero_count, High_non_zero_count, Low_non_zero_count +===== Non_zero_count, high_non_zero_count, low_non_zero_count The `non_zero_count` function detects anomalies when the number of events in a bucket is anomalous, but it ignores cases where the bucket count is zero. Use @@ -163,7 +164,7 @@ data is sparse, use the `count` functions, which are optimized for that scenario [float] [[ml-distinct-count]] -===== Distinct_count, High_distinct_count, Low_distinct_count +===== Distinct_count, high_distinct_count, low_distinct_count The `distinct_count` function detects anomalies where the number of distinct values in one field is unusual. diff --git a/x-pack/docs/en/ml/functions/geo.asciidoc b/x-pack/docs/en/ml/functions/geo.asciidoc index cc98e95bf2069..321277b235e01 100644 --- a/x-pack/docs/en/ml/functions/geo.asciidoc +++ b/x-pack/docs/en/ml/functions/geo.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-geo-functions]] -=== Geographic Functions +=== Geographic functions The geographic functions detect anomalies in the geographic location of the input data. diff --git a/x-pack/docs/en/ml/functions/metric.asciidoc b/x-pack/docs/en/ml/functions/metric.asciidoc index 495fc6f333575..4397da9cb5623 100644 --- a/x-pack/docs/en/ml/functions/metric.asciidoc +++ b/x-pack/docs/en/ml/functions/metric.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-metric-functions]] -=== Metric Functions +=== Metric functions The metric functions include functions such as mean, min and max. These values are calculated for each bucket. Field values that cannot be converted to @@ -106,7 +107,7 @@ response times for each bucket. [float] [[ml-metric-median]] -==== Median, High_median, Low_median +==== Median, high_median, low_median The `median` function detects anomalies in the statistical median of a value. The median value is calculated for each bucket. @@ -143,7 +144,7 @@ median `responsetime` for each application over time. It detects when the median [float] [[ml-metric-mean]] -==== Mean, High_mean, Low_mean +==== Mean, high_mean, low_mean The `mean` function detects anomalies in the arithmetic mean of a value. The mean value is calculated for each bucket. @@ -245,7 +246,7 @@ when the mean, min, or max `responsetime` is unusual compared to previous [float] [[ml-metric-varp]] -==== Varp, High_varp, Low_varp +==== Varp, high_varp, low_varp The `varp` function detects anomalies in the variance of a value which is a measure of the variability and spread in the data. diff --git a/x-pack/docs/en/ml/functions/rare.asciidoc b/x-pack/docs/en/ml/functions/rare.asciidoc index 2485605557cfa..ad9983a6254f1 100644 --- a/x-pack/docs/en/ml/functions/rare.asciidoc +++ b/x-pack/docs/en/ml/functions/rare.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-rare-functions]] -=== Rare Functions +=== Rare functions The rare functions detect values that occur rarely in time or rarely for a population. diff --git a/x-pack/docs/en/ml/functions/sum.asciidoc b/x-pack/docs/en/ml/functions/sum.asciidoc index 3a0f0b264e9ef..00a19f1f3a45a 100644 --- a/x-pack/docs/en/ml/functions/sum.asciidoc +++ b/x-pack/docs/en/ml/functions/sum.asciidoc @@ -1,6 +1,6 @@ - +[role="xpack"] [[ml-sum-functions]] -=== Sum Functions +=== Sum functions The sum functions detect anomalies when the sum of a field in a bucket is anomalous. @@ -25,7 +25,7 @@ a more appropriate method to using the sum function. [float] [[ml-sum]] -==== Sum, High_sum, Low_sum +==== Sum, high_sum, low_sum The `sum` function detects anomalies where the sum of a field in a bucket is anomalous. @@ -79,7 +79,7 @@ to find users that are abusing internet privileges. [float] [[ml-nonnull-sum]] -==== Non_null_sum, High_non_null_sum, Low_non_null_sum +==== Non_null_sum, high_non_null_sum, low_non_null_sum The `non_null_sum` function is useful if your data is sparse. Buckets without values are ignored and buckets with a zero value are analyzed. diff --git a/x-pack/docs/en/ml/functions/time.asciidoc b/x-pack/docs/en/ml/functions/time.asciidoc index a8067e2ca1342..a7349bede34c1 100644 --- a/x-pack/docs/en/ml/functions/time.asciidoc +++ b/x-pack/docs/en/ml/functions/time.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-time-functions]] -=== Time Functions +=== Time functions The time functions detect events that happen at unusual times, either of the day or of the week. These functions can be used to find unusual patterns of behavior, diff --git a/x-pack/docs/en/ml/populations.asciidoc b/x-pack/docs/en/ml/populations.asciidoc index 53e10ce8d41b6..bf0dd2ad7d7bb 100644 --- a/x-pack/docs/en/ml/populations.asciidoc +++ b/x-pack/docs/en/ml/populations.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-configuring-pop]] -=== Performing Population Analysis +=== Performing population analysis Entities or events in your data can be considered anomalous when: diff --git a/x-pack/docs/en/ml/stopping-ml.asciidoc b/x-pack/docs/en/ml/stopping-ml.asciidoc index 862fe5cf05061..c0be2d947cdc7 100644 --- a/x-pack/docs/en/ml/stopping-ml.asciidoc +++ b/x-pack/docs/en/ml/stopping-ml.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[stopping-ml]] -== Stopping Machine Learning +== Stopping machine learning An orderly shutdown of {ml} ensures that: @@ -24,10 +25,10 @@ request stops the `feed1` {dfeed}: [source,js] -------------------------------------------------- -POST _xpack/ml/datafeeds/feed1/_stop +POST _xpack/ml/datafeeds/datafeed-total-requests/_stop -------------------------------------------------- // CONSOLE -// TEST[skip:todo] +// TEST[setup:server_metrics_startdf] NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}. For more information, see <>. @@ -63,10 +64,10 @@ example, the following request closes the `job1` job: [source,js] -------------------------------------------------- -POST _xpack/ml/anomaly_detectors/job1/_close +POST _xpack/ml/anomaly_detectors/total-requests/_close -------------------------------------------------- // CONSOLE -// TEST[skip:todo] +// TEST[setup:server_metrics_openjob] NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}. For more information, see <>. diff --git a/x-pack/docs/en/ml/transforms.asciidoc b/x-pack/docs/en/ml/transforms.asciidoc index 9789518081be2..c4b4d56029748 100644 --- a/x-pack/docs/en/ml/transforms.asciidoc +++ b/x-pack/docs/en/ml/transforms.asciidoc @@ -1,5 +1,6 @@ +[role="xpack"] [[ml-configuring-transform]] -=== Transforming Data With Script Fields +=== Transforming data with script fields If you use {dfeeds}, you can add scripts to transform your data before it is analyzed. {dfeeds-cap} contain an optional `script_fields` property, where @@ -602,10 +603,3 @@ The preview {dfeed} API returns the following results, which show that ] ---------------------------------- // TESTRESPONSE - -//// -==== Configuring Script Fields in {dfeeds-cap} - -//TO-DO: Add Kibana steps from -//https://github.com/elastic/prelert-legacy/wiki/Transforming-data-with-script_fields#transforming-geo_point-data-to-a-workable-string-format -//// From cba1f0cd9b98b942ca57ae7d2af37a31dbef993e Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 7 Jun 2018 16:20:41 -0700 Subject: [PATCH 2/8] [DOCS] Adds testing for custom URL example --- x-pack/docs/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle index 6e56f5ec0da20..183a6ee83a98e 100644 --- a/x-pack/docs/build.gradle +++ b/x-pack/docs/build.gradle @@ -493,7 +493,7 @@ setups['it_ops_logs_job'] = ''' "function": "count", "by_field_name": "mlcategory" }], - "categorization_filters":[ "\\[statement:.*\\]"] + "categorization_filters":[ "\\/[statement:.*\\/]"] }, "analysis_limits":{ "categorization_examples_limit": 5 From 150cdd5a5fa96fcd4ea3b30e417d40e3e4b6b0c3 Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 7 Jun 2018 16:36:56 -0700 Subject: [PATCH 3/8] [DOCS] Disables code snippet testing for ML functions --- x-pack/docs/build.gradle | 10 +++------- x-pack/docs/en/ml/functions/count.asciidoc | 11 +++++++++-- x-pack/docs/en/ml/functions/geo.asciidoc | 3 +++ x-pack/docs/en/ml/functions/info.asciidoc | 3 +++ x-pack/docs/en/ml/functions/metric.asciidoc | 11 +++++++++++ x-pack/docs/en/ml/functions/rare.asciidoc | 3 +++ x-pack/docs/en/ml/functions/sum.asciidoc | 10 +++------- x-pack/docs/en/ml/functions/time.asciidoc | 2 ++ 8 files changed, 37 insertions(+), 16 deletions(-) diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle index 183a6ee83a98e..2a6fb96764486 100644 --- a/x-pack/docs/build.gradle +++ b/x-pack/docs/build.gradle @@ -9,13 +9,9 @@ apply plugin: 'elasticsearch.docs-test' * only remove entries from this list. When it is empty we'll remove it * entirely and have a party! There will be cake and everything.... */ buildRestTests.expectedUnconvertedCandidates = [ - 'en/ml/functions/count.asciidoc', - 'en/ml/functions/geo.asciidoc', - 'en/ml/functions/info.asciidoc', - 'en/ml/functions/metric.asciidoc', - 'en/ml/functions/rare.asciidoc', - 'en/ml/functions/sum.asciidoc', - 'en/ml/functions/time.asciidoc', + 'en/rest-api/security/ssl.asciidoc', + 'en/rest-api/security/users.asciidoc', + 'en/rest-api/security/tokens.asciidoc', 'en/rest-api/watcher/put-watch.asciidoc', 'en/security/authentication/user-cache.asciidoc', 'en/security/authorization/field-and-document-access-control.asciidoc', diff --git a/x-pack/docs/en/ml/functions/count.asciidoc b/x-pack/docs/en/ml/functions/count.asciidoc index c268ef6a395d7..ffbba04ef9dbb 100644 --- a/x-pack/docs/en/ml/functions/count.asciidoc +++ b/x-pack/docs/en/ml/functions/count.asciidoc @@ -47,6 +47,7 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects] -------------------------------------------------- { "function" : "count" } -------------------------------------------------- +// NOTCONSOLE This example is probably the simplest possible analysis. It identifies time buckets during which the overall count of events is higher or lower than @@ -64,6 +65,7 @@ and detects when the event rate is unusual compared to its past behavior. "over_field_name": "user" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_count` function in a detector in your job, it models the event rate for each error code. It detects users that generate an @@ -78,6 +80,7 @@ unusually high count of error codes compared to other users. "by_field_name" : "status_code" } -------------------------------------------------- +// NOTCONSOLE In this example, the function detects when the count of events for a status code is lower than usual. @@ -96,11 +99,12 @@ compared to its past behavior. ] } -------------------------------------------------- +// NOTCONSOLE If you are analyzing an aggregated `events_per_min` field, do not use a sum function (for example, `sum(events_per_min)`). Instead, use the count function -and the `summary_count_field_name` property. -//TO-DO: For more information, see <>. +and the `summary_count_field_name` property. For more information, see +<>. [float] [[ml-nonzero-count]] @@ -150,6 +154,7 @@ The `non_zero_count` function models only the following data: "by_field_name" : "signaturename" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_non_zero_count` function in a detector in your job, it models the count of events for the `signaturename` field. It ignores any buckets @@ -193,6 +198,7 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects] "field_name" : "user" } -------------------------------------------------- +// NOTCONSOLE This `distinct_count` function detects when a system has an unusual number of logged in users. When you use this function in a detector in your job, it @@ -208,6 +214,7 @@ users is unusual compared to the past. "over_field_name": "src_ip" } -------------------------------------------------- +// NOTCONSOLE This example detects instances of port scanning. When you use this function in a detector in your job, it models the distinct count of ports. It also detects the diff --git a/x-pack/docs/en/ml/functions/geo.asciidoc b/x-pack/docs/en/ml/functions/geo.asciidoc index 321277b235e01..12bdc7b4f079a 100644 --- a/x-pack/docs/en/ml/functions/geo.asciidoc +++ b/x-pack/docs/en/ml/functions/geo.asciidoc @@ -35,6 +35,7 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects] "by_field_name" : "creditCardNumber" } -------------------------------------------------- +// NOTCONSOLE If you use this `lat_long` function in a detector in your job, it detects anomalies where the geographic location of a credit card transaction is @@ -55,6 +56,7 @@ For example, JSON data might contain the following transaction coordinates: "creditCardNumber": "1234123412341234" } -------------------------------------------------- +// NOTCONSOLE In {es}, location data is likely to be stored in `geo_point` fields. For more information, see {ref}/geo-point.html[Geo-point datatype]. This data type is not @@ -76,5 +78,6 @@ format. For example, the following Painless script transforms } } -------------------------------------------------- +// NOTCONSOLE For more information, see <>. diff --git a/x-pack/docs/en/ml/functions/info.asciidoc b/x-pack/docs/en/ml/functions/info.asciidoc index f964d4eb3ec67..2c3117e0e5644 100644 --- a/x-pack/docs/en/ml/functions/info.asciidoc +++ b/x-pack/docs/en/ml/functions/info.asciidoc @@ -40,6 +40,7 @@ For more information about those properties, see "over_field_name" : "highest_registered_domain" } -------------------------------------------------- +// NOTCONSOLE If you use this `info_content` function in a detector in your job, it models information that is present in the `subdomain` string. It detects anomalies @@ -60,6 +61,7 @@ choice. "over_field_name" : "src_ip" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_info_content` function in a detector in your job, it models information content that is held in the DNS query string. It detects @@ -77,6 +79,7 @@ information content is higher than expected. "by_field_name" : "logfilename" } -------------------------------------------------- +// NOTCONSOLE If you use this `low_info_content` function in a detector in your job, it models information content that is present in the message string for each diff --git a/x-pack/docs/en/ml/functions/metric.asciidoc b/x-pack/docs/en/ml/functions/metric.asciidoc index 4397da9cb5623..3ee5179702720 100644 --- a/x-pack/docs/en/ml/functions/metric.asciidoc +++ b/x-pack/docs/en/ml/functions/metric.asciidoc @@ -43,6 +43,7 @@ For more information about those properties, see "by_field_name" : "product" } -------------------------------------------------- +// NOTCONSOLE If you use this `min` function in a detector in your job, it detects where the smallest transaction is lower than previously observed. You can use this @@ -77,6 +78,7 @@ For more information about those properties, see "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `max` function in a detector in your job, it detects where the longest `responsetime` is longer than previously observed. You can use this @@ -99,6 +101,7 @@ to previous applications. "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE The analysis in the previous example can be performed alongside `high_mean` functions by application. By combining detectors and using the same influencer @@ -137,6 +140,7 @@ For more information about those properties, see "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `median` function in a detector in your job, it models the median `responsetime` for each application over time. It detects when the median @@ -174,6 +178,7 @@ For more information about those properties, see "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `mean` function in a detector in your job, it models the mean `responsetime` for each application over time. It detects when the mean @@ -188,6 +193,7 @@ If you use this `mean` function in a detector in your job, it models the mean "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_mean` function in a detector in your job, it models the mean `responsetime` for each application over time. It detects when the mean @@ -202,6 +208,7 @@ mean `responsetime` for each application over time. It detects when the mean "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `low_mean` function in a detector in your job, it models the mean `responsetime` for each application over time. It detects when the mean @@ -238,6 +245,7 @@ For more information about those properties, see "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `metric` function in a detector in your job, it models the mean, min, and max `responsetime` for each application over time. It detects @@ -274,6 +282,7 @@ For more information about those properties, see "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `varp` function in a detector in your job, it models the variance in values of `responsetime` for each application over time. It detects @@ -289,6 +298,7 @@ behavior. "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_varp` function in a detector in your job, it models the variance in values of `responsetime` for each application over time. It detects @@ -304,6 +314,7 @@ behavior. "by_field_name" : "application" } -------------------------------------------------- +// NOTCONSOLE If you use this `low_varp` function in a detector in your job, it models the variance in values of `responsetime` for each application over time. It detects diff --git a/x-pack/docs/en/ml/functions/rare.asciidoc b/x-pack/docs/en/ml/functions/rare.asciidoc index ad9983a6254f1..fc30918b508f1 100644 --- a/x-pack/docs/en/ml/functions/rare.asciidoc +++ b/x-pack/docs/en/ml/functions/rare.asciidoc @@ -55,6 +55,7 @@ For more information about those properties, see "by_field_name" : "status" } -------------------------------------------------- +// NOTCONSOLE If you use this `rare` function in a detector in your job, it detects values that are rare in time. It models status codes that occur over time and detects @@ -70,6 +71,7 @@ status codes in a web access log that have never (or rarely) occurred before. "over_field_name" : "clientip" } -------------------------------------------------- +// NOTCONSOLE If you use this `rare` function in a detector in your job, it detects values that are rare in a population. It models status code and client IP interactions @@ -112,6 +114,7 @@ For more information about those properties, see "over_field_name" : "clientip" } -------------------------------------------------- +// NOTCONSOLE If you use this `freq_rare` function in a detector in your job, it detects values that are frequently rare in a population. It models URI paths and diff --git a/x-pack/docs/en/ml/functions/sum.asciidoc b/x-pack/docs/en/ml/functions/sum.asciidoc index 00a19f1f3a45a..7a95ad63fccee 100644 --- a/x-pack/docs/en/ml/functions/sum.asciidoc +++ b/x-pack/docs/en/ml/functions/sum.asciidoc @@ -16,13 +16,6 @@ The {xpackml} features include the following sum functions: * xref:ml-sum[`sum`, `high_sum`, `low_sum`] * xref:ml-nonnull-sum[`non_null_sum`, `high_non_null_sum`, `low_non_null_sum`] -//// -TBD: Incorporate from prelert docs?: -Input data may contain pre-calculated fields giving the total count of some value e.g. transactions per minute. -Ensure you are familiar with our advice on Summarization of Input Data, as this is likely to provide -a more appropriate method to using the sum function. -//// - [float] [[ml-sum]] ==== Sum, high_sum, low_sum @@ -54,6 +47,7 @@ For more information about those properties, see "over_field_name" : "employee" } -------------------------------------------------- +// NOTCONSOLE If you use this `sum` function in a detector in your job, it models total expenses per employees for each cost center. For each time bucket, @@ -69,6 +63,7 @@ to other employees. "over_field_name" : "cs_host" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_sum` function in a detector in your job, it models total `cs_bytes`. It detects `cs_hosts` that transfer unusually high @@ -110,6 +105,7 @@ is not applicable for this function. "byFieldName" : "employee" } -------------------------------------------------- +// NOTCONSOLE If you use this `high_non_null_sum` function in a detector in your job, it models the total `amount_approved` for each employee. It ignores any buckets diff --git a/x-pack/docs/en/ml/functions/time.asciidoc b/x-pack/docs/en/ml/functions/time.asciidoc index a7349bede34c1..ac8199307f130 100644 --- a/x-pack/docs/en/ml/functions/time.asciidoc +++ b/x-pack/docs/en/ml/functions/time.asciidoc @@ -61,6 +61,7 @@ For more information about those properties, see "by_field_name" : "process" } -------------------------------------------------- +// NOTCONSOLE If you use this `time_of_day` function in a detector in your job, it models when events occur throughout a day for each process. It detects when an @@ -92,6 +93,7 @@ For more information about those properties, see "over_field_name" : "workstation" } -------------------------------------------------- +// NOTCONSOLE If you use this `time_of_week` function in a detector in your job, it models when events occur throughout the week for each `eventcode`. It detects From 4b7e7c9ec407dda8527902cfb2e7b0fd30ce1001 Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 7 Jun 2018 17:05:04 -0700 Subject: [PATCH 4/8] [DOCS] Enables code testing for ML count examples --- x-pack/docs/en/ml/functions/count.asciidoc | 113 ++++++++++++++++----- 1 file changed, 88 insertions(+), 25 deletions(-) diff --git a/x-pack/docs/en/ml/functions/count.asciidoc b/x-pack/docs/en/ml/functions/count.asciidoc index ffbba04ef9dbb..a2dc5645b61ae 100644 --- a/x-pack/docs/en/ml/functions/count.asciidoc +++ b/x-pack/docs/en/ml/functions/count.asciidoc @@ -45,9 +45,20 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects] .Example 1: Analyzing events with the count function [source,js] -------------------------------------------------- -{ "function" : "count" } +PUT _xpack/ml/anomaly_detectors/example1 +{ + "analysis_config": { + "detectors": [{ + "function" : "count" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } +} -------------------------------------------------- -// NOTCONSOLE +// CONSOLE This example is probably the simplest possible analysis. It identifies time buckets during which the overall count of events is higher or lower than @@ -59,13 +70,22 @@ and detects when the event rate is unusual compared to its past behavior. .Example 2: Analyzing errors with the high_count function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example2 { - "function" : "high_count", - "by_field_name" : "error_code", - "over_field_name": "user" + "analysis_config": { + "detectors": [{ + "function" : "high_count", + "by_field_name" : "error_code", + "over_field_name": "user" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE If you use this `high_count` function in a detector in your job, it models the event rate for each error code. It detects users that generate an @@ -75,12 +95,21 @@ unusually high count of error codes compared to other users. .Example 3: Analyzing status codes with the low_count function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example3 { - "function" : "low_count", - "by_field_name" : "status_code" + "analysis_config": { + "detectors": [{ + "function" : "low_count", + "by_field_name" : "status_code" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE In this example, the function detects when the count of events for a status code is lower than usual. @@ -92,14 +121,21 @@ compared to its past behavior. .Example 4: Analyzing aggregated data with the count function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example4 { - "summary_count_field_name" : "events_per_min", - "detectors" [ - { "function" : "count" } - ] -} + "analysis_config": { + "summary_count_field_name" : "events_per_min", + "detectors": [{ + "function" : "count" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } +} -------------------------------------------------- -// NOTCONSOLE +// CONSOLE If you are analyzing an aggregated `events_per_min` field, do not use a sum function (for example, `sum(events_per_min)`). Instead, use the count function @@ -149,12 +185,21 @@ The `non_zero_count` function models only the following data: .Example 5: Analyzing signatures with the high_non_zero_count function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example5 { - "function" : "high_non_zero_count", - "by_field_name" : "signaturename" + "analysis_config": { + "detectors": [{ + "function" : "high_non_zero_count", + "by_field_name" : "signaturename" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE If you use this `high_non_zero_count` function in a detector in your job, it models the count of events for the `signaturename` field. It ignores any buckets @@ -193,12 +238,21 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects] .Example 6: Analyzing users with the distinct_count function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example6 { - "function" : "distinct_count", - "field_name" : "user" + "analysis_config": { + "detectors": [{ + "function" : "distinct_count", + "field_name" : "user" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE This `distinct_count` function detects when a system has an unusual number of logged in users. When you use this function in a detector in your job, it @@ -208,13 +262,22 @@ users is unusual compared to the past. .Example 7: Analyzing ports with the high_distinct_count function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example7 { - "function" : "high_distinct_count", - "field_name" : "dst_port", - "over_field_name": "src_ip" + "analysis_config": { + "detectors": [{ + "function" : "high_distinct_count", + "field_name" : "dst_port", + "over_field_name": "src_ip" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE This example detects instances of port scanning. When you use this function in a detector in your job, it models the distinct count of ports. It also detects the From 94a7d64c2a175fa22363208d72e1097d995ff4a8 Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 7 Jun 2018 17:26:35 -0700 Subject: [PATCH 5/8] [DOCS] Enables code testing for ML geo examples --- x-pack/docs/en/ml/functions/geo.asciidoc | 28 +++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/x-pack/docs/en/ml/functions/geo.asciidoc b/x-pack/docs/en/ml/functions/geo.asciidoc index 12bdc7b4f079a..e9685b46e1677 100644 --- a/x-pack/docs/en/ml/functions/geo.asciidoc +++ b/x-pack/docs/en/ml/functions/geo.asciidoc @@ -29,13 +29,22 @@ see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects] .Example 1: Analyzing transactions with the lat_long function [source,js] -------------------------------------------------- +PUT _xpack/ml/anomaly_detectors/example1 { - "function" : "lat_long", - "field_name" : "transactionCoordinates", - "by_field_name" : "creditCardNumber" + "analysis_config": { + "detectors": [{ + "function" : "lat_long", + "field_name" : "transactionCoordinates", + "by_field_name" : "creditCardNumber" + }] + }, + "data_description": { + "time_field":"timestamp", + "time_format": "epoch_ms" + } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE If you use this `lat_long` function in a detector in your job, it detects anomalies where the geographic location of a credit card transaction is @@ -67,7 +76,15 @@ format. For example, the following Painless script transforms [source,js] -------------------------------------------------- +PUT _xpack/ml/datafeeds/datafeed-test2 { + "job_id": "farequote", + "indices": ["farequote"], + "query": { + "match_all": { + "boost": 1 + } + }, "script_fields": { "lat-lon": { "script": { @@ -78,6 +95,7 @@ format. For example, the following Painless script transforms } } -------------------------------------------------- -// NOTCONSOLE +// CONSOLE +// TEST[setup:farequote_job] For more information, see <>. From 2e3dbcd2a7410874a4bb9cdf6769095f89021494 Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 14 Jun 2018 10:47:31 -0700 Subject: [PATCH 6/8] [DOCS] Removes unnecessary step from build.gradle --- x-pack/docs/build.gradle | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle index 2a6fb96764486..fbcb5ab1f08e8 100644 --- a/x-pack/docs/build.gradle +++ b/x-pack/docs/build.gradle @@ -474,32 +474,6 @@ setups['calendar_outages_addevent'] = setups['calendar_outages_addjob'] + ''' { "description": "event 3", "start_time": "1514160000000", "end_time": "1514246400000"} ]} ''' -setups['it_ops_logs_job'] = ''' - - do: - xpack.ml.put_job: - job_id: "it_ops_logs" - body: > - { - "analysis_config" : { - "categorization_field_name": "message", - "bucket_span":"30m", - "detectors" :[ - { - "detector_description": "Unusual message counts", - "function": "count", - "by_field_name": "mlcategory" - }], - "categorization_filters":[ "\\/[statement:.*\\/]"] - }, - "analysis_limits":{ - "categorization_examples_limit": 5 - }, - "data_description" : { - "time_field":"time", - "time_format": "epoch_ms" - } - } -''' setups['role_mapping'] = ''' - do: xpack.security.put_role_mapping: From b77ea62771bb21a4c4181d49632933675f9b43d1 Mon Sep 17 00:00:00 2001 From: lcawl Date: Mon, 18 Jun 2018 12:10:17 -0700 Subject: [PATCH 7/8] [DOCS] Rebased build.gradle file --- x-pack/docs/build.gradle | 3 --- 1 file changed, 3 deletions(-) diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle index fbcb5ab1f08e8..f1d06df369618 100644 --- a/x-pack/docs/build.gradle +++ b/x-pack/docs/build.gradle @@ -9,9 +9,6 @@ apply plugin: 'elasticsearch.docs-test' * only remove entries from this list. When it is empty we'll remove it * entirely and have a party! There will be cake and everything.... */ buildRestTests.expectedUnconvertedCandidates = [ - 'en/rest-api/security/ssl.asciidoc', - 'en/rest-api/security/users.asciidoc', - 'en/rest-api/security/tokens.asciidoc', 'en/rest-api/watcher/put-watch.asciidoc', 'en/security/authentication/user-cache.asciidoc', 'en/security/authorization/field-and-document-access-control.asciidoc', From ccfadcc5ff2d7d33fd096169db448e487f905eca Mon Sep 17 00:00:00 2001 From: lcawl Date: Tue, 19 Jun 2018 10:32:03 -0700 Subject: [PATCH 8/8] [DOCS] Removed API quickref from build.gradle --- x-pack/docs/build.gradle | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/docs/build.gradle b/x-pack/docs/build.gradle index f1d06df369618..e97faf12a6cd0 100644 --- a/x-pack/docs/build.gradle +++ b/x-pack/docs/build.gradle @@ -49,7 +49,6 @@ buildRestTests.expectedUnconvertedCandidates = [ 'en/watcher/troubleshooting.asciidoc', 'en/rest-api/license/delete-license.asciidoc', 'en/rest-api/license/update-license.asciidoc', - 'en/ml/api-quickref.asciidoc', 'en/rest-api/ml/delete-snapshot.asciidoc', 'en/rest-api/ml/forecast.asciidoc', 'en/rest-api/ml/get-bucket.asciidoc',