From 1b6af55c4a3539dbb17cdeb1a15ec3fd01a533ee Mon Sep 17 00:00:00 2001 From: Nicolas Ruflin Date: Thu, 16 Feb 2017 12:56:03 +0100 Subject: [PATCH] Remove defaults from fields.common.yml for more flexibility (#3602) * Remove defaults from fields.common.yml for more flexibility Having the defaults in code instead of the libbeat fields.yml will allow to reuse the same script also for fields.yml files from a metricbeat module for example without having to merge in the libbeat file. This is a prerequisite for dynamic index template generation in Golang which can be only for a module. The top level `fields` entry was removed as without the defaults, it is not needed anymore. All scripts were adjusted for the new format. * * Switch dict to object and list to array type. This is to be in line with the elasticsearch naming: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html * Remove object-type from array as arrays cannot be predefined. --- filebeat/docs/fields.asciidoc | 2 +- heartbeat/docs/fields.asciidoc | 2 +- libbeat/_meta/fields.common.yml | 11 ++---- libbeat/scripts/generate_fields_docs.py | 4 +-- libbeat/scripts/generate_index_pattern.py | 9 ++--- libbeat/scripts/generate_template.py | 36 ++++++++++--------- libbeat/tests/system/beat/beat.py | 4 +-- metricbeat/docs/fields.asciidoc | 16 ++++----- .../module/docker/container/_meta/fields.yml | 7 ++-- metricbeat/module/docker/cpu/_meta/fields.yml | 2 +- .../module/docker/image/_meta/fields.yml | 7 ++-- .../module/kafka/partition/_meta/fields.yml | 2 +- .../module/system/process/_meta/fields.yml | 8 ++--- packetbeat/_meta/fields.yml | 34 +++++++++--------- packetbeat/docs/fields.asciidoc | 28 +++++++-------- packetbeat/protos/amqp/_meta/fields.yml | 6 ++-- packetbeat/protos/cassandra/_meta/fields.yml | 4 +-- packetbeat/protos/dns/_meta/fields.yml | 6 ++-- packetbeat/protos/http/_meta/fields.yml | 8 ++--- packetbeat/protos/memcache/_meta/fields.yml | 10 +++--- winlogbeat/_meta/fields.yml | 10 +++--- winlogbeat/docs/fields.asciidoc | 8 ++--- 22 files changed, 110 insertions(+), 114 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 069e3b10d44..b524c042d66 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -333,7 +333,7 @@ Arbitrary tags that can be set per Beat and per transaction type. [float] === fields -type: dict +type: object Contains user configurable fields. diff --git a/heartbeat/docs/fields.asciidoc b/heartbeat/docs/fields.asciidoc index 4290203729c..7bee2fdef6b 100644 --- a/heartbeat/docs/fields.asciidoc +++ b/heartbeat/docs/fields.asciidoc @@ -65,7 +65,7 @@ Arbitrary tags that can be set per Beat and per transaction type. [float] === fields -type: dict +type: object Contains user configurable fields. diff --git a/libbeat/_meta/fields.common.yml b/libbeat/_meta/fields.common.yml index bb9c7ac84c5..314c5ca8826 100644 --- a/libbeat/_meta/fields.common.yml +++ b/libbeat/_meta/fields.common.yml @@ -1,11 +1,4 @@ -defaults: - type: keyword - required: false - index: true - doc_values: true - ignore_above: 1024 -fields: - key: beat title: Beat description: > @@ -40,8 +33,8 @@ fields: type. - name: fields - type: dict - dict-type: keyword + type: object + object-type: keyword description: > Contains user configurable fields. diff --git a/libbeat/scripts/generate_fields_docs.py b/libbeat/scripts/generate_fields_docs.py index e3635296a15..bfa11e77bd0 100644 --- a/libbeat/scripts/generate_fields_docs.py +++ b/libbeat/scripts/generate_fields_docs.py @@ -84,7 +84,7 @@ def fields_to_asciidoc(input, output, beat): # Create sections from available fields sections = {} - for v in docs["fields"]: + for v in docs: sections[v["key"]] = v["title"] for key in sorted(sections): @@ -92,7 +92,7 @@ def fields_to_asciidoc(input, output, beat): output.write("\n--\n") # Sort alphabetically by key - for section in sorted(docs["fields"], key=lambda field: field["key"]): + for section in sorted(docs, key=lambda field: field["key"]): section["name"] = section["title"] section["anchor"] = section["key"] document_fields(output, section, sections, "") diff --git a/libbeat/scripts/generate_index_pattern.py b/libbeat/scripts/generate_index_pattern.py index 2e748f35cb1..95408180597 100644 --- a/libbeat/scripts/generate_index_pattern.py +++ b/libbeat/scripts/generate_index_pattern.py @@ -97,7 +97,7 @@ def fields_to_index_pattern(args, input): } - for k, section in enumerate(docs["fields"]): + for k, section in enumerate(docs): fields_to_json(section, "", output) # add meta fields @@ -149,9 +149,10 @@ def get_index_pattern_name(index): with open(fields_yml, 'r') as f: fields = f.read() - # Prepend beat fields from libbeat - with open(args.libbeat + "/_meta/fields.generated.yml") as f: - fields = f.read() + fields + if os.path.basename(args.beat) != "libbeat": + # Prepend beat fields from libbeat + with open(args.libbeat + "/_meta/fields.generated.yml") as f: + fields = f.read() + fields # with open(target, 'w') as output: output = fields_to_index_pattern(args, fields) diff --git a/libbeat/scripts/generate_template.py b/libbeat/scripts/generate_template.py index a27543a327a..b85a09dc522 100644 --- a/libbeat/scripts/generate_template.py +++ b/libbeat/scripts/generate_template.py @@ -30,16 +30,16 @@ def fields_to_es_template(args, input, output, index, version): print("fields.yml is empty. Cannot generate template.") return - # Each template needs defaults - if "defaults" not in docs.keys(): - print("No defaults are defined. Each template needs at" + - " least defaults defined.") - return - - defaults = docs["defaults"] + defaults = { + "type": "keyword", + "required": False, + "index": True, + "doc_values": True, + "ignore_above": 1024, + } - for k, section in enumerate(docs["fields"]): - docs["fields"][k] = dedot(section) + for k, section in enumerate(docs): + docs[k] = dedot(section) # skeleton template = { @@ -105,9 +105,8 @@ def fields_to_es_template(args, input, output, index, version): } }) - for section in docs["fields"]: - prop, dynamic = fill_section_properties(args, section, - defaults, "") + for section in docs: + prop, dynamic = fill_section_properties(args, section, defaults, "") properties.update(prop) dynamic_templates.extend(dynamic) @@ -253,10 +252,15 @@ def fill_field_properties(args, field, defaults, path): properties[field["name"]]["scaling_factor"] = \ field.get("scaling_factor", 1000) - elif field["type"] in ["dict", "list"]: - if field.get("dict-type") == "text": + elif field["type"] in ["array"]: + properties[field["name"]] = { + "properties": {} + } + + elif field["type"] in ["object"]: + if field.get("object-type") == "text": # add a dynamic template to set all members of - # the dict as text + # the object as text if len(path) > 0: name = path + "." + field["name"] else: @@ -284,7 +288,7 @@ def fill_field_properties(args, field, defaults, path): } }) - if field.get("dict-type") == "long": + if field.get("object-type") == "long": if len(path) > 0: name = path + "." + field["name"] else: diff --git a/libbeat/tests/system/beat/beat.py b/libbeat/tests/system/beat/beat.py index d3aba834c74..e870ab2a90c 100644 --- a/libbeat/tests/system/beat/beat.py +++ b/libbeat/tests/system/beat/beat.py @@ -408,7 +408,7 @@ def extract_fields(doc_list, name): dictfields.extend(subdictfields) else: fields.append(newName) - if field.get("type") in ["dict", "geo_point"]: + if field.get("type") in ["object", "geo_point"]: dictfields.append(newName) return fields, dictfields @@ -429,7 +429,7 @@ def extract_fields(doc_list, name): fields = [] dictfields = [] - for item in doc["fields"]: + for item in doc: subfields, subdictfields = extract_fields(item["fields"], "") fields.extend(subfields) dictfields.extend(subdictfields) diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index f74dc258851..d32f0bd5b6b 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -409,7 +409,7 @@ Arbitrary tags that can be set per Beat and per transaction type. [float] === fields -type: dict +type: object Contains user configurable fields. @@ -1335,7 +1335,7 @@ Size of the files that have been created or changed since creation. [float] === docker.container.labels -type: dict +type: object Image labels. @@ -1343,7 +1343,7 @@ Image labels. [float] === docker.container.tags -type: list +type: array Image tags. @@ -1574,7 +1574,7 @@ Total size of the all cached images associated to the current image. [float] === docker.image.labels -type: dict +type: object Image labels. @@ -1582,7 +1582,7 @@ Image labels. [float] === docker.image.tags -type: list +type: array Image tags. @@ -2990,7 +2990,7 @@ Leader id (broker). [float] === kafka.partition.partition.isr -type: list +type: array List of isr ids. @@ -6313,7 +6313,7 @@ The username of the user that created the process. If the username cannot be det [float] === system.process.env -type: dict +type: object The environment variables used to start the process. The data is available on FreeBSD, Linux, and OS X. @@ -6597,7 +6597,7 @@ CPU time consumed by tasks in user (kernel) mode. [float] === system.process.cgroup.cpuacct.percpu -type: dict +type: object CPU time (in nanoseconds) consumed on each CPU by all tasks in this cgroup. diff --git a/metricbeat/module/docker/container/_meta/fields.yml b/metricbeat/module/docker/container/_meta/fields.yml index 4735ce82e54..d800fcbf290 100644 --- a/metricbeat/module/docker/container/_meta/fields.yml +++ b/metricbeat/module/docker/container/_meta/fields.yml @@ -41,12 +41,11 @@ description: > Size of the files that have been created or changed since creation. - name: labels - type: dict - dict-type: keyword + type: object + object-type: keyword description: > Image labels. - name: tags - type: list - dict-type: keyword + type: array description: > Image tags. diff --git a/metricbeat/module/docker/cpu/_meta/fields.yml b/metricbeat/module/docker/cpu/_meta/fields.yml index 3d7704a8844..72c0b0a574d 100644 --- a/metricbeat/module/docker/cpu/_meta/fields.yml +++ b/metricbeat/module/docker/cpu/_meta/fields.yml @@ -35,6 +35,6 @@ Total CPU usage. # TODO: how to document cpu list? #- name: core - # type: list + # type: array # description: > # Dictionary with list of cpu and usage inside. diff --git a/metricbeat/module/docker/image/_meta/fields.yml b/metricbeat/module/docker/image/_meta/fields.yml index 19f71e358bd..52a5223ca71 100644 --- a/metricbeat/module/docker/image/_meta/fields.yml +++ b/metricbeat/module/docker/image/_meta/fields.yml @@ -35,13 +35,12 @@ Total size of the all cached images associated to the current image. - name: labels - type: dict - dict-type: keyword + type: object + object-type: keyword description: > Image labels. - name: tags - type: list - dict-type: keyword + type: array description: > Image tags. diff --git a/metricbeat/module/kafka/partition/_meta/fields.yml b/metricbeat/module/kafka/partition/_meta/fields.yml index 8c7f923085c..6ea85f44f85 100644 --- a/metricbeat/module/kafka/partition/_meta/fields.yml +++ b/metricbeat/module/kafka/partition/_meta/fields.yml @@ -32,7 +32,7 @@ description: > Leader id (broker). - name: isr - type: list + type: array description: > List of isr ids. - name: replica diff --git a/metricbeat/module/system/process/_meta/fields.yml b/metricbeat/module/system/process/_meta/fields.yml index 1696d4e0d6b..3b5c067cefb 100644 --- a/metricbeat/module/system/process/_meta/fields.yml +++ b/metricbeat/module/system/process/_meta/fields.yml @@ -36,8 +36,8 @@ numeric identifier (UID). On Windows, this field includes the user's domain and is formatted as `domain\username`. - name: env - type: dict - dict-type: keyword + type: object + object-type: keyword description: > The environment variables used to start the process. The data is available on FreeBSD, Linux, and OS X. @@ -235,8 +235,8 @@ description: CPU time consumed by tasks in user (kernel) mode. - name: percpu - type: dict - dict-type: long + type: object + object-type: long description: > CPU time (in nanoseconds) consumed on each CPU by all tasks in this cgroup. diff --git a/packetbeat/_meta/fields.yml b/packetbeat/_meta/fields.yml index 808f30b5641..7d03a2a8b70 100644 --- a/packetbeat/_meta/fields.yml +++ b/packetbeat/_meta/fields.yml @@ -569,7 +569,7 @@ Acknowledge multiple messages. - name: arguments - type: dict + type: object description: > Optional additional arguments passed to some methods. Can be of various types. @@ -596,8 +596,8 @@ MIME content encoding. - name: headers - type: dict - dict-type: keyword + type: object + object-type: keyword description: > Message header field table. @@ -828,8 +828,8 @@ description: The paging_state is a bytes value that should be used in QUERY/EXECUTE to continue paging and retrieve the remainder of the result for this query. - name: supported - type: dict - dict-type: keyword + type: object + object-type: keyword description: Indicates which startup options are supported by the server. This message comes as a response to an OPTIONS message. - name: authentication @@ -1046,7 +1046,7 @@ example: amazon.co.uk. - name: answers - type: dict + type: object description: > An array containing a dictionary about each answer section returned by the server. @@ -1082,7 +1082,7 @@ on the type and class of the resource record. - name: authorities - type: dict + type: object description: > An array containing a dictionary for each authority section from the answer. @@ -1107,7 +1107,7 @@ example: IN - name: additionals - type: dict + type: object description: > An array containing a dictionary for each additional section from the answer. @@ -1176,8 +1176,8 @@ The query parameters or form values. The query parameters are available in the Request-URI and the form values are set in the HTTP body when the content-type is set to `x-www-form-urlencoded`. - name: headers - type: dict - dict-type: keyword + type: object + object-type: keyword description: > A map containing the captured header fields from the request. Which headers to capture is configurable. If headers with the same @@ -1200,8 +1200,8 @@ example: Not found. - name: headers - type: dict - dict-type: keyword + type: object + object-type: keyword description: > A map containing the captured header fields from the response. Which headers to capture is configurable. If headers with the @@ -1352,12 +1352,12 @@ The status code value returned in the response (binary protocol only). - name: request.keys - type: list + type: array description: > The list of keys sent in the store or load commands. - name: response.keys - type: list + type: array description: > The list of keys returned for the load command (if present). @@ -1374,12 +1374,12 @@ If the command does not send any data, this field is missing. - name: request.values - type: list + type: array description: > The list of base64 encoded values sent with the request (if present). - name: response.values - type: list + type: array description: > The list of base64 encoded values sent with the response (if present). @@ -1482,7 +1482,7 @@ (if present). - name: response.stats - type: list + type: array description: > The list of statistic values returned. Each entry is a dictionary with the fields "name" and "value". diff --git a/packetbeat/docs/fields.asciidoc b/packetbeat/docs/fields.asciidoc index 1eec67e1935..1aec20510c3 100644 --- a/packetbeat/docs/fields.asciidoc +++ b/packetbeat/docs/fields.asciidoc @@ -229,7 +229,7 @@ Acknowledge multiple messages. [float] === amqp.arguments -type: dict +type: object Optional additional arguments passed to some methods. Can be of various types. @@ -271,7 +271,7 @@ MIME content encoding. [float] === amqp.headers -type: dict +type: object Message header field table. @@ -404,7 +404,7 @@ Arbitrary tags that can be set per Beat and per transaction type. [float] === fields -type: dict +type: object Contains user configurable fields. @@ -770,7 +770,7 @@ The paging_state is a bytes value that should be used in QUERY/EXECUTE to contin [float] === cassandra.response.supported -type: dict +type: object Indicates which startup options are supported by the server. This message comes as a response to an OPTIONS message. @@ -1300,7 +1300,7 @@ The effective top-level domain (eTLD) plus one more label. For example, the eTLD [float] === dns.answers -type: dict +type: object An array containing a dictionary about each answer section returned by the server. @@ -1351,7 +1351,7 @@ The data describing the resource. The meaning of this data depends on the type a [float] === dns.authorities -type: dict +type: object An array containing a dictionary for each authority section from the answer. @@ -1388,7 +1388,7 @@ The class of DNS data contained in this resource record. [float] === dns.additionals -type: dict +type: object An array containing a dictionary for each additional section from the answer. @@ -1774,7 +1774,7 @@ The query parameters or form values. The query parameters are available in the R [float] === http.request.headers -type: dict +type: object A map containing the captured header fields from the request. Which headers to capture is configurable. If headers with the same header name are present in the message, they will be separated by commas. @@ -1809,7 +1809,7 @@ The HTTP status phrase. [float] === http.response.headers -type: dict +type: object A map containing the captured header fields from the response. Which headers to capture is configurable. If headers with the same header name are present in the message, they will be separated by commas. @@ -2012,7 +2012,7 @@ The status code value returned in the response (binary protocol only). [float] === memcache.request.keys -type: list +type: array The list of keys sent in the store or load commands. @@ -2020,7 +2020,7 @@ The list of keys sent in the store or load commands. [float] === memcache.response.keys -type: list +type: array The list of keys returned for the load command (if present). @@ -2044,7 +2044,7 @@ The number of values found in the memcache response message. If the command does [float] === memcache.request.values -type: list +type: array The list of base64 encoded values sent with the request (if present). @@ -2052,7 +2052,7 @@ The list of base64 encoded values sent with the request (if present). [float] === memcache.response.values -type: list +type: array The list of base64 encoded values sent with the response (if present). @@ -2208,7 +2208,7 @@ The CAS (compare-and-swap) identifier to be used with CAS-based updates (if pres [float] === memcache.response.stats -type: list +type: array The list of statistic values returned. Each entry is a dictionary with the fields "name" and "value". diff --git a/packetbeat/protos/amqp/_meta/fields.yml b/packetbeat/protos/amqp/_meta/fields.yml index 4833ed27838..05cc6d34fce 100644 --- a/packetbeat/protos/amqp/_meta/fields.yml +++ b/packetbeat/protos/amqp/_meta/fields.yml @@ -125,7 +125,7 @@ Acknowledge multiple messages. - name: arguments - type: dict + type: object description: > Optional additional arguments passed to some methods. Can be of various types. @@ -152,8 +152,8 @@ MIME content encoding. - name: headers - type: dict - dict-type: keyword + type: object + object-type: keyword description: > Message header field table. diff --git a/packetbeat/protos/cassandra/_meta/fields.yml b/packetbeat/protos/cassandra/_meta/fields.yml index 545d2fb91cd..7537c746402 100644 --- a/packetbeat/protos/cassandra/_meta/fields.yml +++ b/packetbeat/protos/cassandra/_meta/fields.yml @@ -175,8 +175,8 @@ description: The paging_state is a bytes value that should be used in QUERY/EXECUTE to continue paging and retrieve the remainder of the result for this query. - name: supported - type: dict - dict-type: keyword + type: object + object-type: keyword description: Indicates which startup options are supported by the server. This message comes as a response to an OPTIONS message. - name: authentication diff --git a/packetbeat/protos/dns/_meta/fields.yml b/packetbeat/protos/dns/_meta/fields.yml index 7c7c7a670a0..7659fdf57d4 100644 --- a/packetbeat/protos/dns/_meta/fields.yml +++ b/packetbeat/protos/dns/_meta/fields.yml @@ -83,7 +83,7 @@ example: amazon.co.uk. - name: answers - type: dict + type: object description: > An array containing a dictionary about each answer section returned by the server. @@ -119,7 +119,7 @@ on the type and class of the resource record. - name: authorities - type: dict + type: object description: > An array containing a dictionary for each authority section from the answer. @@ -144,7 +144,7 @@ example: IN - name: additionals - type: dict + type: object description: > An array containing a dictionary for each additional section from the answer. diff --git a/packetbeat/protos/http/_meta/fields.yml b/packetbeat/protos/http/_meta/fields.yml index 9310bd27de2..643b9520939 100644 --- a/packetbeat/protos/http/_meta/fields.yml +++ b/packetbeat/protos/http/_meta/fields.yml @@ -15,8 +15,8 @@ The query parameters or form values. The query parameters are available in the Request-URI and the form values are set in the HTTP body when the content-type is set to `x-www-form-urlencoded`. - name: headers - type: dict - dict-type: keyword + type: object + object-type: keyword description: > A map containing the captured header fields from the request. Which headers to capture is configurable. If headers with the same @@ -39,8 +39,8 @@ example: Not found. - name: headers - type: dict - dict-type: keyword + type: object + object-type: keyword description: > A map containing the captured header fields from the response. Which headers to capture is configurable. If headers with the diff --git a/packetbeat/protos/memcache/_meta/fields.yml b/packetbeat/protos/memcache/_meta/fields.yml index 3d5fae104eb..8fe11955d44 100644 --- a/packetbeat/protos/memcache/_meta/fields.yml +++ b/packetbeat/protos/memcache/_meta/fields.yml @@ -102,12 +102,12 @@ The status code value returned in the response (binary protocol only). - name: request.keys - type: list + type: array description: > The list of keys sent in the store or load commands. - name: response.keys - type: list + type: array description: > The list of keys returned for the load command (if present). @@ -124,12 +124,12 @@ If the command does not send any data, this field is missing. - name: request.values - type: list + type: array description: > The list of base64 encoded values sent with the request (if present). - name: response.values - type: list + type: array description: > The list of base64 encoded values sent with the response (if present). @@ -232,7 +232,7 @@ (if present). - name: response.stats - type: list + type: array description: > The list of statistic values returned. Each entry is a dictionary with the fields "name" and "value". diff --git a/winlogbeat/_meta/fields.yml b/winlogbeat/_meta/fields.yml index fa10dcfacf8..e0f75bdee5a 100644 --- a/winlogbeat/_meta/fields.yml +++ b/winlogbeat/_meta/fields.yml @@ -38,15 +38,15 @@ event forwarding, this name can differ from the `beat.hostname`. - name: event_data - type: dict - dict-type: keyword + type: object + object-type: keyword required: false description: > The event-specific data. This field is mutually exclusive with `user_data`. If you are capturing event data on versions prior to Windows Vista, the parameters in `event_data` are named `param1`, `param2`, and so on, because event log parameters are unnamed in - earlier versions of Windows. + earlier versions of Windows. - name: event_id type: long @@ -150,8 +150,8 @@ The thread_id identifies the thread that generated the event. - name: user_data - type: dict - dict-type: keyword + type: object + object-type: keyword required: false description: > The event specific data. This field is mutually exclusive with diff --git a/winlogbeat/docs/fields.asciidoc b/winlogbeat/docs/fields.asciidoc index 55540254508..7826b431a75 100644 --- a/winlogbeat/docs/fields.asciidoc +++ b/winlogbeat/docs/fields.asciidoc @@ -66,7 +66,7 @@ Arbitrary tags that can be set per Beat and per transaction type. [float] === fields -type: dict +type: object Contains user configurable fields. @@ -168,11 +168,11 @@ The name of the computer that generated the record. When using Windows event for [float] === event_data -type: dict +type: object required: False -The event-specific data. This field is mutually exclusive with `user_data`. If you are capturing event data on versions prior to Windows Vista, the parameters in `event_data` are named `param1`, `param2`, and so on, because event log parameters are unnamed in earlier versions of Windows. +The event-specific data. This field is mutually exclusive with `user_data`. If you are capturing event data on versions prior to Windows Vista, the parameters in `event_data` are named `param1`, `param2`, and so on, because event log parameters are unnamed in earlier versions of Windows. [float] @@ -318,7 +318,7 @@ The thread_id identifies the thread that generated the event. [float] === user_data -type: dict +type: object required: False