From 6bdc7d721aca52fff6538c81f3ee0c6dcd03cca4 Mon Sep 17 00:00:00 2001 From: Shaunak Kashyap Date: Thu, 14 May 2020 15:50:46 -0700 Subject: [PATCH] Introduce auto detection of format (#18095) * Introduce auto detection of format * Update docs * Auto detect format for slowlogs * Exclude JSON logs from multiline matching * Adding CHANGELOG entry * Fix typo * Parsing everything as JSON first * Going back to old processor definitions * Adding Known Issues section in doc * Completing regex pattern * Updating regex pattern * Generating docs --- CHANGELOG.next.asciidoc | 1 + filebeat/docs/modules/logstash.asciidoc | 23 +++++++----------- filebeat/module/logstash/_meta/docs.asciidoc | 23 +++++++----------- filebeat/module/logstash/log/config/log.yml | 4 +--- .../logstash/log/ingest/pipeline-json.yml | 3 --- ...eline-plain.yml => pipeline-plaintext.yml} | 3 --- .../module/logstash/log/ingest/pipeline.yml | 24 +++++++++++++++++++ filebeat/module/logstash/log/manifest.yml | 13 ++++++---- .../test/logstash-plain-7.4.log-expected.json | 2 +- .../logstash/log/test/logstash-plain.log | 8 ++++++- .../log/test/logstash-plain.log-expected.json | 20 +++++++++++++++- .../logstash/slowlog/ingest/pipeline-json.yml | 3 --- ...eline-plain.yml => pipeline-plaintext.yml} | 3 --- .../logstash/slowlog/ingest/pipeline.yml | 24 +++++++++++++++++++ filebeat/module/logstash/slowlog/manifest.yml | 13 ++++++---- 15 files changed, 109 insertions(+), 58 deletions(-) rename filebeat/module/logstash/log/ingest/{pipeline-plain.yml => pipeline-plaintext.yml} (96%) create mode 100644 filebeat/module/logstash/log/ingest/pipeline.yml rename filebeat/module/logstash/slowlog/ingest/{pipeline-plain.yml => pipeline-plaintext.yml} (96%) create mode 100644 filebeat/module/logstash/slowlog/ingest/pipeline.yml diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 69f175ccd15..8a4152bf57a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -325,6 +325,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Improve ECS categorization field mappings in system module. {issue}16031[16031] {pull}18065[18065] - Change the `json.*` input settings implementation to merge parsed json objects with existing objects in the event instead of fully replacing them. {pull}17958[17958] - Improve ECS categorization field mappings in osquery module. {issue}16176[16176] {pull}17881[17881] +- The `logstash` module can now automatically detect the log file format (JSON or plaintext) and process it accordingly. {issue}9964[9964] {pull}18095[18095] *Heartbeat* diff --git a/filebeat/docs/modules/logstash.asciidoc b/filebeat/docs/modules/logstash.asciidoc index 2a9ace71d1b..5c9df956e05 100644 --- a/filebeat/docs/modules/logstash.asciidoc +++ b/filebeat/docs/modules/logstash.asciidoc @@ -8,8 +8,8 @@ This file is generated! See scripts/docs_collector.py == Logstash module -The +{modulename}+ module parse logstash regular logs and the slow log, it will support the plain text format -and the JSON format (--log.format json). The default is the plain text format. +The +{modulename}+ modules parse logstash regular logs and the slow log, it will support the plain text format +and the JSON format. include::../include/what-happens.asciidoc[] @@ -34,7 +34,7 @@ The Logstash `slowlog` fileset was tested with logs from Logstash 5.6 and 6.0 include::../include/configuring-intro.asciidoc[] The following example shows how to set paths in the +modules.d/{modulename}.yml+ -file to override the default paths for Logstash logs and set the format to json +file to override the default paths for Logstash logs. ["source","yaml",subs="attributes"] ----- @@ -42,11 +42,9 @@ file to override the default paths for Logstash logs and set the format to json log: enabled: true var.paths: ["/path/to/log/logstash.log*"] - var.format: json slowlog: enabled: true var.paths: ["/path/to/log/logstash-slowlog.log*"] - var.format: json ----- To specify the same settings at the command line, you use: @@ -68,21 +66,11 @@ include::../include/config-option-intro.asciidoc[] include::../include/var-paths.asciidoc[] -*`var.format`*:: - -The configured Logstash log format. Possible values are: `json` or `plain`. The -default is `plain`. - [float] ==== `slowlog` fileset settings include::../include/var-paths.asciidoc[] -*`var.format`*:: - -The configured Logstash log format. Possible values are: `json` or `plain`. The -default is `plain`. - include::../include/timezone-support.asciidoc[] [float] @@ -96,6 +84,11 @@ image::./images/kibana-logstash-log.png[] [role="screenshot"] image::./images/kibana-logstash-slowlog.png[] +[float] +=== Known issues +When using the `log` fileset to parse plaintext logs, if a multiline plaintext log contains an embedded JSON objct such that +the JSON object starts on a new line, the fileset may not parse the multiline plaintext log event correctly. + :has-dashboards!: :fileset_ex!: diff --git a/filebeat/module/logstash/_meta/docs.asciidoc b/filebeat/module/logstash/_meta/docs.asciidoc index 2fc59161812..8f4e0d33a3c 100644 --- a/filebeat/module/logstash/_meta/docs.asciidoc +++ b/filebeat/module/logstash/_meta/docs.asciidoc @@ -3,8 +3,8 @@ == Logstash module -The +{modulename}+ module parse logstash regular logs and the slow log, it will support the plain text format -and the JSON format (--log.format json). The default is the plain text format. +The +{modulename}+ modules parse logstash regular logs and the slow log, it will support the plain text format +and the JSON format. include::../include/what-happens.asciidoc[] @@ -29,7 +29,7 @@ The Logstash `slowlog` fileset was tested with logs from Logstash 5.6 and 6.0 include::../include/configuring-intro.asciidoc[] The following example shows how to set paths in the +modules.d/{modulename}.yml+ -file to override the default paths for Logstash logs and set the format to json +file to override the default paths for Logstash logs. ["source","yaml",subs="attributes"] ----- @@ -37,11 +37,9 @@ file to override the default paths for Logstash logs and set the format to json log: enabled: true var.paths: ["/path/to/log/logstash.log*"] - var.format: json slowlog: enabled: true var.paths: ["/path/to/log/logstash-slowlog.log*"] - var.format: json ----- To specify the same settings at the command line, you use: @@ -63,21 +61,11 @@ include::../include/config-option-intro.asciidoc[] include::../include/var-paths.asciidoc[] -*`var.format`*:: - -The configured Logstash log format. Possible values are: `json` or `plain`. The -default is `plain`. - [float] ==== `slowlog` fileset settings include::../include/var-paths.asciidoc[] -*`var.format`*:: - -The configured Logstash log format. Possible values are: `json` or `plain`. The -default is `plain`. - include::../include/timezone-support.asciidoc[] [float] @@ -91,6 +79,11 @@ image::./images/kibana-logstash-log.png[] [role="screenshot"] image::./images/kibana-logstash-slowlog.png[] +[float] +=== Known issues +When using the `log` fileset to parse plaintext logs, if a multiline plaintext log contains an embedded JSON objct such that +the JSON object starts on a new line, the fileset may not parse the multiline plaintext log event correctly. + :has-dashboards!: :fileset_ex!: diff --git a/filebeat/module/logstash/log/config/log.yml b/filebeat/module/logstash/log/config/log.yml index 27140697f77..af0e4c33735 100644 --- a/filebeat/module/logstash/log/config/log.yml +++ b/filebeat/module/logstash/log/config/log.yml @@ -5,12 +5,10 @@ paths: {{ end }} exclude_files: [".gz$"] -{{ if eq .format "plain" }} multiline: - pattern: ^\[[0-9]{4}-[0-9]{2}-[0-9]{2} + pattern: ^((\[[0-9]{4}-[0-9]{2}-[0-9]{2}[^\]]+\])|({.+})) negate: true match: after -{{ end }} processors: # Locale for time zone is only needed in non-json logs diff --git a/filebeat/module/logstash/log/ingest/pipeline-json.yml b/filebeat/module/logstash/log/ingest/pipeline-json.yml index 28d6c6f95eb..f14a3be2855 100644 --- a/filebeat/module/logstash/log/ingest/pipeline-json.yml +++ b/filebeat/module/logstash/log/ingest/pipeline-json.yml @@ -7,9 +7,6 @@ processors: - json: field: message target_field: logstash.log -- rename: - field: '@timestamp' - target_field: event.created - convert: field: logstash.log.timeMillis type: string diff --git a/filebeat/module/logstash/log/ingest/pipeline-plain.yml b/filebeat/module/logstash/log/ingest/pipeline-plaintext.yml similarity index 96% rename from filebeat/module/logstash/log/ingest/pipeline-plain.yml rename to filebeat/module/logstash/log/ingest/pipeline-plaintext.yml index 6ec17b74073..4c75de47c99 100644 --- a/filebeat/module/logstash/log/ingest/pipeline-plain.yml +++ b/filebeat/module/logstash/log/ingest/pipeline-plaintext.yml @@ -17,9 +17,6 @@ processors: %{GREEDYMULTILINE:message} - \[%{TIMESTAMP_ISO8601:logstash.log.timestamp}\]\[%{LOGSTASH_LOGLEVEL:log.level}\s?\]\[%{LOGSTASH_CLASS_MODULE:logstash.log.module}\s*\] %{GREEDYMULTILINE:message} -- rename: - field: '@timestamp' - target_field: event.created - date: if: ctx.event.timezone == null field: logstash.log.timestamp diff --git a/filebeat/module/logstash/log/ingest/pipeline.yml b/filebeat/module/logstash/log/ingest/pipeline.yml new file mode 100644 index 00000000000..0a416e5758e --- /dev/null +++ b/filebeat/module/logstash/log/ingest/pipeline.yml @@ -0,0 +1,24 @@ +description: Pipeline for parsing logstash node logs +processors: +- rename: + field: '@timestamp' + target_field: event.created +- grok: + field: message + patterns: + - ^%{CHAR:first_char} + pattern_definitions: + CHAR: . +- pipeline: + if: ctx.first_char != '{' + name: '{< IngestPipeline "pipeline-plaintext" >}' +- pipeline: + if: ctx.first_char == '{' + name: '{< IngestPipeline "pipeline-json" >}' +- remove: + field: + - first_char +on_failure: + - set: + field: error.message + value: '{{ _ingest.on_failure_message }}' diff --git a/filebeat/module/logstash/log/manifest.yml b/filebeat/module/logstash/log/manifest.yml index b5ea4edafe1..c1c231ca5f3 100644 --- a/filebeat/module/logstash/log/manifest.yml +++ b/filebeat/module/logstash/log/manifest.yml @@ -1,13 +1,16 @@ module_version: 1.0 var: - - name: format - default: plain - name: paths default: - - /var/log/logstash/logstash-{{.format}}*.log + - /var/log/logstash/logstash-plain*.log + - /var/log/logstash/logstash-json*.log os.windows: - - c:/programdata/logstash/logs/logstash-{{.format}}*.log + - c:/programdata/logstash/logs/logstash-plain*.log + - c:/programdata/logstash/logs/logstash-json*.log -ingest_pipeline: ingest/pipeline-{{.format}}.yml +ingest_pipeline: + - ingest/pipeline.yml + - ingest/pipeline-plaintext.yml + - ingest/pipeline-json.yml input: config/log.yml diff --git a/filebeat/module/logstash/log/test/logstash-plain-7.4.log-expected.json b/filebeat/module/logstash/log/test/logstash-plain-7.4.log-expected.json index b9b93ff3140..c37f8d70590 100644 --- a/filebeat/module/logstash/log/test/logstash-plain-7.4.log-expected.json +++ b/filebeat/module/logstash/log/test/logstash-plain-7.4.log-expected.json @@ -31,4 +31,4 @@ "message": "Encountered a retryable error. Will Retry with exponential backoff...", "service.type": "logstash" } -] +] \ No newline at end of file diff --git a/filebeat/module/logstash/log/test/logstash-plain.log b/filebeat/module/logstash/log/test/logstash-plain.log index d65e2af70bb..6e270e00d85 100644 --- a/filebeat/module/logstash/log/test/logstash-plain.log +++ b/filebeat/module/logstash/log/test/logstash-plain.log @@ -2,4 +2,10 @@ [2017-11-20T03:55:00,318][INFO ][logstash.inputs.jdbc ] (0.058950s) Select Name as [person.name] , Address as [person.address] from people - +[2020-05-13T11:00:26,431][INFO ][logstash.inputs.json ] (0.158950s) { +"foo": [ +{ + "bar": "baz" +} +] +} diff --git a/filebeat/module/logstash/log/test/logstash-plain.log-expected.json b/filebeat/module/logstash/log/test/logstash-plain.log-expected.json index 983fd1ef14c..a39376e1ec4 100644 --- a/filebeat/module/logstash/log/test/logstash-plain.log-expected.json +++ b/filebeat/module/logstash/log/test/logstash-plain.log-expected.json @@ -29,7 +29,25 @@ "log.level": "INFO", "log.offset": 175, "logstash.log.module": "logstash.inputs.jdbc", - "message": "(0.058950s) Select Name as [person.name]\n, Address as [person.address]\nfrom people\n", + "message": "(0.058950s) Select Name as [person.name]\n, Address as [person.address]\nfrom people", + "service.type": "logstash" + }, + { + "@timestamp": "2020-05-13T11:00:26.431-02:00", + "event.dataset": "logstash.log", + "event.kind": "event", + "event.module": "logstash", + "event.timezone": "-02:00", + "event.type": "info", + "fileset.name": "log", + "input.type": "log", + "log.flags": [ + "multiline" + ], + "log.level": "INFO", + "log.offset": 318, + "logstash.log.module": "logstash.inputs.json", + "message": "(0.158950s) {\n\"foo\": [\n{\n \"bar\": \"baz\"\n}\n]\n}", "service.type": "logstash" } ] \ No newline at end of file diff --git a/filebeat/module/logstash/slowlog/ingest/pipeline-json.yml b/filebeat/module/logstash/slowlog/ingest/pipeline-json.yml index acfd95a08d2..d68b3eb9eee 100644 --- a/filebeat/module/logstash/slowlog/ingest/pipeline-json.yml +++ b/filebeat/module/logstash/slowlog/ingest/pipeline-json.yml @@ -7,9 +7,6 @@ processors: - json: field: message target_field: logstash.slowlog -- rename: - field: '@timestamp' - target_field: event.created - convert: field: logstash.slowlog.timeMillis type: string diff --git a/filebeat/module/logstash/slowlog/ingest/pipeline-plain.yml b/filebeat/module/logstash/slowlog/ingest/pipeline-plaintext.yml similarity index 96% rename from filebeat/module/logstash/slowlog/ingest/pipeline-plain.yml rename to filebeat/module/logstash/slowlog/ingest/pipeline-plaintext.yml index a049b26d603..2ac47879cea 100644 --- a/filebeat/module/logstash/slowlog/ingest/pipeline-plain.yml +++ b/filebeat/module/logstash/slowlog/ingest/pipeline-plaintext.yml @@ -21,9 +21,6 @@ processors: patterns: - '{:plugin_params=>%{GREEDYDATA:logstash.slowlog.plugin_params}, :took_in_nanos=>%{NUMBER:event.duration}, :took_in_millis=>%{NUMBER:logstash.slowlog.took_in_millis}, :event=>%{GREEDYDATA:logstash.slowlog.event}}' -- rename: - field: '@timestamp' - target_field: event.created - date: if: ctx.event.timezone == null field: logstash.slowlog.timestamp diff --git a/filebeat/module/logstash/slowlog/ingest/pipeline.yml b/filebeat/module/logstash/slowlog/ingest/pipeline.yml new file mode 100644 index 00000000000..061a4f8c636 --- /dev/null +++ b/filebeat/module/logstash/slowlog/ingest/pipeline.yml @@ -0,0 +1,24 @@ +description: Pipeline for parsing logstash slow logs +processors: +- rename: + field: '@timestamp' + target_field: event.created +- grok: + field: message + patterns: + - ^%{CHAR:first_char} + pattern_definitions: + CHAR: . +- pipeline: + if: ctx.first_char != '{' + name: '{< IngestPipeline "pipeline-plaintext" >}' +- pipeline: + if: ctx.first_char == '{' + name: '{< IngestPipeline "pipeline-json" >}' +- remove: + field: + - first_char +on_failure: + - set: + field: error.message + value: '{{ _ingest.on_failure_message }}' diff --git a/filebeat/module/logstash/slowlog/manifest.yml b/filebeat/module/logstash/slowlog/manifest.yml index afce6486404..d37f2ebf28c 100644 --- a/filebeat/module/logstash/slowlog/manifest.yml +++ b/filebeat/module/logstash/slowlog/manifest.yml @@ -1,13 +1,16 @@ module_version: 1.0 var: - - name: format - default: plain - name: paths default: - - /var/log/logstash/logstash-slowlog-{{.format}}*.log + - /var/log/logstash/logstash-slowlog-plain*.log + - /var/log/logstash/logstash-slowlog-json*.log os.windows: - - c:/programdata/logstash/logs/logstash-slowlog-{{.format}}*.log + - c:/programdata/logstash/logs/logstash-slowlog-plain*.log + - c:/programdata/logstash/logs/logstash-slowlog-json*.log -ingest_pipeline: ingest/pipeline-{{.format}}.yml +ingest_pipeline: + - ingest/pipeline.yml + - ingest/pipeline-plaintext.yml + - ingest/pipeline-json.yml input: config/slowlog.yml