From 1f3659f0ed37b791418e3e774ed7751d534c4de8 Mon Sep 17 00:00:00 2001
From: ruflin <spam@ruflin.com>
Date: Thu, 4 May 2017 12:57:36 +0200
Subject: [PATCH] Remove document_type from Filebeat

The `_type` field was removed in elasticsearch 6.0. The initial intention of `document_type` was to define different `_type`. As this does not exist anymore the config option was removed. It is recommend to use `fields` instead to add specific fields to a prospector.

* Adjust tests accordingly
---
 CHANGELOG.asciidoc                                  |  1 +
 filebeat/_meta/common.full.p2.yml                   |  5 -----
 filebeat/_meta/fields.common.yml                    |  5 -----
 filebeat/docs/fields.asciidoc                       |  8 --------
 filebeat/docs/migration.asciidoc                    | 13 +++----------
 .../configuration/filebeat-options.asciidoc         |  7 -------
 filebeat/filebeat.full.yml                          |  5 -----
 filebeat/harvester/config.go                        |  2 --
 filebeat/harvester/log.go                           |  1 -
 filebeat/tests/system/test_json.py                  | 10 ++++------
 filebeat/tests/system/test_processors.py            |  8 ++++----
 filebeat/tests/system/test_prospector.py            |  4 ++--
 libbeat/tests/system/beat/beat.py                   |  2 +-
 13 files changed, 15 insertions(+), 56 deletions(-)

diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index b5e7dc984c8..e97fc027bec 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -29,6 +29,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
 - Remove deprecated config options force_close_files and close_older. {pull}3768[3768]
 - Change clean_removed behaviour to also remove states for files which cannot be found anymore under the same name. {pull}3827[3827]
 - Add Icinga module. {pull}3904[3904]
+- Remove `document_type` config option. Use `fields` instead. {pull}4204[4204]
 
 *Heartbeat*
 - Event format and field naming changes in Heartbeat and sample Dashboard. {pull}4091[4091]
diff --git a/filebeat/_meta/common.full.p2.yml b/filebeat/_meta/common.full.p2.yml
index 6fdaa8cd4a8..61d34414c9d 100644
--- a/filebeat/_meta/common.full.p2.yml
+++ b/filebeat/_meta/common.full.p2.yml
@@ -67,11 +67,6 @@ filebeat.prospectors:
   # Time strings like 2h (2 hours), 5m (5 minutes) can be used.
   #ignore_older: 0
 
-  # Type to be published in the 'type' field. For Elasticsearch output,
-  # the type defines the document type these entries should be stored
-  # in. Default: log
-  #document_type: log
-
   # How often the prospector checks for new files in the paths that are specified
   # for harvesting. Specify 1s to scan the directory as frequently as possible
   # without causing Filebeat to scan too frequently. Default: 10s.
diff --git a/filebeat/_meta/fields.common.yml b/filebeat/_meta/fields.common.yml
index d33ef10a6c7..0c515594660 100644
--- a/filebeat/_meta/fields.common.yml
+++ b/filebeat/_meta/fields.common.yml
@@ -23,11 +23,6 @@
       description: >
         The content of the line read from the log file.
 
-    - name: type
-      required: true
-      description: >
-        The name of the log event. This field is set to the value specified for the `document_type` option in the prospector section of the Filebeat config file.
-
     - name: input_type
       required: true
       description: >
diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc
index aa7fb831f7c..bf9e806726d 100644
--- a/filebeat/docs/fields.asciidoc
+++ b/filebeat/docs/fields.asciidoc
@@ -767,14 +767,6 @@ required: True
 The content of the line read from the log file.
 
 
-[float]
-=== type
-
-required: True
-
-The name of the log event. This field is set to the value specified for the `document_type` option in the prospector section of the Filebeat config file.
-
-
 [float]
 === input_type
 
diff --git a/filebeat/docs/migration.asciidoc b/filebeat/docs/migration.asciidoc
index 8f4b18ba679..1f683758041 100644
--- a/filebeat/docs/migration.asciidoc
+++ b/filebeat/docs/migration.asciidoc
@@ -4,7 +4,7 @@
 [partintro]
 --
 Filebeat is based on the Logstash Forwarder source code and replaces Logstash Forwarder as the method
-to use for tailing log files and forwarding them to Logstash. 
+to use for tailing log files and forwarding them to Logstash.
 
 Filebeat introduces the following major changes:
 
@@ -139,25 +139,20 @@ filebeat.prospectors:
   paths:
     - /var/log/messages
     - /var/log/*.log
-  document_type: syslog <1>
   fields:
     service: apache
     zone: us-east-1
   fields_under_root: true
 - input_type: stdin <2>
-  document_type: stdin
 - input_type: log
   paths:
     - /var/log/apache2/httpd-*.log
-  document_type: apache
 -------------------------------------------------------------------------------------
 
-<1> The `document_type` option controls the output `type` field, which is used by the
-    Elasticsearch output to determine the document type.
-<2> The explicit `input_type` option was introduced to differentiate between normal files and
+<1> The explicit `input_type` option was introduced to differentiate between normal files and
     stdin. In the future, additional types might be supported.
 
-As you can see, apart from the new `document_type` and `input_type` options,
+As you can see, apart from the new `input_type` options,
 which were before implicitly defined via the `type` custom field, the remaining
 options can be migrated mechanically.
 
@@ -287,7 +282,6 @@ filebeat.prospectors:
 - input_type: log
   paths:
     - /var/log/*.log
-  document_type: syslog
   fields:
     service: test01
 output.elasticsearch:
@@ -375,7 +369,6 @@ filebeat.prospectors:
 - input_type: log
   paths:
     - /var/log/*.log
-  document_type: syslog
   fields:
     service: test01
   fields_under_root: true
diff --git a/filebeat/docs/reference/configuration/filebeat-options.asciidoc b/filebeat/docs/reference/configuration/filebeat-options.asciidoc
index e0e095a9aba..83136af967e 100644
--- a/filebeat/docs/reference/configuration/filebeat-options.asciidoc
+++ b/filebeat/docs/reference/configuration/filebeat-options.asciidoc
@@ -14,7 +14,6 @@ filebeat.prospectors:
 - input_type: log
   paths:
     - /var/log/apache/httpd-*.log
-  document_type: apache
 
 - input_type: log
   paths:
@@ -303,12 +302,6 @@ If you require log lines to be sent in near real time do not use a very low `sca
 The default setting is 10s.
 
 [[filebeat-document-type]]
-===== document_type
-
-The event type to use for published lines read by harvesters. For Elasticsearch
-output, the value that you specify here is used to set the `type` field in the output
-document. The default value is `log`.
-
 ===== harvester_buffer_size
 
 The size in bytes of the buffer that each harvester uses when fetching a file. The default is 16384.
diff --git a/filebeat/filebeat.full.yml b/filebeat/filebeat.full.yml
index 14105fa4813..5b107cacfd1 100644
--- a/filebeat/filebeat.full.yml
+++ b/filebeat/filebeat.full.yml
@@ -236,11 +236,6 @@ filebeat.prospectors:
   # Time strings like 2h (2 hours), 5m (5 minutes) can be used.
   #ignore_older: 0
 
-  # Type to be published in the 'type' field. For Elasticsearch output,
-  # the type defines the document type these entries should be stored
-  # in. Default: log
-  #document_type: log
-
   # How often the prospector checks for new files in the paths that are specified
   # for harvesting. Specify 1s to scan the directory as frequently as possible
   # without causing Filebeat to scan too frequently. Default: 10s.
diff --git a/filebeat/harvester/config.go b/filebeat/harvester/config.go
index 05b653f403a..f9653380263 100644
--- a/filebeat/harvester/config.go
+++ b/filebeat/harvester/config.go
@@ -26,7 +26,6 @@ var (
 		CloseRenamed:  false,
 		CloseEOF:      false,
 		CloseTimeout:  0,
-		DocumentType:  "log",
 		CleanInactive: 0,
 	}
 )
@@ -49,7 +48,6 @@ type harvesterConfig struct {
 	MaxBytes             int                     `config:"max_bytes" validate:"min=0,nonzero"`
 	Multiline            *reader.MultilineConfig `config:"multiline"`
 	JSON                 *reader.JSONConfig      `config:"json"`
-	DocumentType         string                  `config:"document_type"`
 	CleanInactive        time.Duration           `config:"clean_inactive" validate:"min=0"`
 	Pipeline             string                  `config:"pipeline"`
 	Module               string                  `config:"_module_name"`  // hidden option to set the module name
diff --git a/filebeat/harvester/log.go b/filebeat/harvester/log.go
index b7fee62952d..1aabc3ab2b6 100644
--- a/filebeat/harvester/log.go
+++ b/filebeat/harvester/log.go
@@ -150,7 +150,6 @@ func (h *Harvester) Harvest(r reader.Reader) {
 				"@timestamp": common.Time(message.Ts),
 				"source":     state.Source,
 				"offset":     state.Offset, // Offset here is the offset before the starting char.
-				"type":       h.config.DocumentType,
 				"input_type": h.config.InputType,
 			}
 			data.Event.DeepUpdate(message.Fields)
diff --git a/filebeat/tests/system/test_json.py b/filebeat/tests/system/test_json.py
index eb70cf61a02..297055e3f00 100644
--- a/filebeat/tests/system/test_json.py
+++ b/filebeat/tests/system/test_json.py
@@ -196,7 +196,6 @@ def test_timestamp_in_message(self):
         output = self.read_output()
         assert len(output) == 5
         assert all(isinstance(o["@timestamp"], basestring) for o in output)
-        assert all(isinstance(o["type"], basestring) for o in output)
         assert output[0]["@timestamp"] == "2016-04-05T18:47:18.444Z"
 
         assert output[1]["@timestamp"] != "invalid"
@@ -239,14 +238,13 @@ def test_type_in_message(self):
         output = self.read_output()
         assert len(output) == 3
         assert all(isinstance(o["@timestamp"], basestring) for o in output)
-        assert all(isinstance(o["type"], basestring) for o in output)
         assert output[0]["type"] == "test"
 
-        assert output[1]["type"] == "log"
+        assert "type" not in output[1]
         assert output[1]["json_error"] == \
             "type not overwritten (not string)"
 
-        assert output[2]["type"] == "log"
+        assert "type" not in output[2]
         assert output[2]["json_error"] == \
             "type not overwritten (not string)"
 
@@ -283,7 +281,7 @@ def test_with_generic_filtering(self):
         proc.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )
         assert len(output) == 1
         o = output[0]
@@ -327,7 +325,7 @@ def test_with_generic_filtering_remove_headers(self):
         proc.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )
         assert len(output) == 1
         o = output[0]
diff --git a/filebeat/tests/system/test_processors.py b/filebeat/tests/system/test_processors.py
index 47fb701f09c..e5c3a82a031 100644
--- a/filebeat/tests/system/test_processors.py
+++ b/filebeat/tests/system/test_processors.py
@@ -28,7 +28,7 @@ def test_dropfields(self):
         filebeat.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )[0]
         assert "beat.name" not in output
         assert "message" in output
@@ -53,7 +53,7 @@ def test_include_fields(self):
         filebeat.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )[0]
         assert "beat.name" not in output
         assert "message" in output
@@ -81,7 +81,7 @@ def test_drop_event(self):
         filebeat.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )[0]
         assert "beat.name" in output
         assert "message" in output
@@ -110,7 +110,7 @@ def test_condition(self):
         filebeat.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )[0]
         assert "beat.name" in output
         assert "message" in output
diff --git a/filebeat/tests/system/test_prospector.py b/filebeat/tests/system/test_prospector.py
index 1b746af0a6c..bb84d67b5f5 100644
--- a/filebeat/tests/system/test_prospector.py
+++ b/filebeat/tests/system/test_prospector.py
@@ -648,7 +648,7 @@ def test_prospector_filter_dropfields(self):
         filebeat.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )[0]
         assert "offset" not in output
         assert "message" in output
@@ -673,7 +673,7 @@ def test_prospector_filter_includefields(self):
         filebeat.check_kill_and_wait()
 
         output = self.read_output(
-            required_fields=["@timestamp", "type"],
+            required_fields=["@timestamp"],
         )[0]
         assert "message" not in output
         assert "offset" in output
diff --git a/libbeat/tests/system/beat/beat.py b/libbeat/tests/system/beat/beat.py
index e8946f2c7f2..b2af1000dcd 100644
--- a/libbeat/tests/system/beat/beat.py
+++ b/libbeat/tests/system/beat/beat.py
@@ -11,7 +11,7 @@
 import yaml
 from datetime import datetime, timedelta
 
-BEAT_REQUIRED_FIELDS = ["@timestamp", "type",
+BEAT_REQUIRED_FIELDS = ["@timestamp",
                         "beat.name", "beat.hostname", "beat.version"]
 
 INTEGRATION_TESTS = os.environ.get('INTEGRATION_TESTS', False)