From 2fa13478e37287064e0101a8ea3be84adee94888 Mon Sep 17 00:00:00 2001
From: Tiago Queiroz <tiago.queiroz@elastic.co>
Date: Thu, 19 Dec 2024 14:10:44 -0500
Subject: [PATCH 1/3] Use `fingerprint` file identity by default and migrate
 file state from `native` or `path` (#41762)

This commit changes the default `file_identity` from `native` to
`fingerprint`, any previous state from `native` (or `path`) is
automatically migrated to `fingerprint` whe Filestream is starting.

The Filestream input has always had the [ability to update file identifiers](https://github.com/elastic/beats/blob/4278366ab03221e8b62183dc06f9505f6ccc5209/filebeat/input/filestream/prospector.go#L104-L122),
however it never worked as expected, leading to full data duplication
when changing the file identity. This commit fixes it to allow
changing the file identity from `native` (inode + device ID) and
`path` to `fingerprint` without any data duplication.

(cherry picked from commit 78fe7a5b71bdbd9d1bb174fdc79876b8d294d00d)

# Conflicts:
#	filebeat/tests/integration/filestream_test.go
---
 CHANGELOG.next.asciidoc                       |   3 +
 .../config/filebeat.global.reference.yml.tmpl |   2 +
 .../config/filebeat.inputs.reference.yml.tmpl |   7 +-
 filebeat/docs/faq.asciidoc                    |  11 +
 .../input-filestream-file-options.asciidoc    |  71 ++-
 .../docs/inputs/input-filestream.asciidoc     |  46 +-
 filebeat/filebeat.reference.yml               |   9 +-
 filebeat/include/list.go                      |   1 +
 filebeat/input/filestream/environment_test.go |  12 +-
 filebeat/input/filestream/fswatch.go          |   2 +-
 filebeat/input/filestream/fswatch_test.go     |   6 +
 filebeat/input/filestream/identifier.go       |   2 +-
 filebeat/input/filestream/identifier_test.go  |  23 +-
 .../filestream/input_integration_test.go      | 238 +++++----
 filebeat/input/filestream/input_test.go       |   3 +
 .../internal/input-logfile/prospector.go      |   7 +-
 .../internal/input-logfile/store.go           |  83 ++--
 .../internal/input-logfile/store_test.go      |  23 +-
 .../legacy_metrics_integration_test.go        |   8 +-
 .../filestream/metrics_integration_test.go    |  12 +-
 .../filestream/parsers_integration_test.go    | 110 +++--
 filebeat/input/filestream/prospector.go       |  88 +++-
 .../input/filestream/prospector_creator.go    |   6 +-
 filebeat/input/filestream/prospector_test.go  | 162 ++++++-
 filebeat/input/filestream/testdata/log.log    |  10 +
 .../tests/integration/event_log_file_test.go  |   2 +
 filebeat/tests/integration/filestream_test.go | 455 ++++++++++++++++++
 .../integration/filestream_truncation_test.go |   2 +
 filebeat/tests/integration/store_test.go      |   2 +
 .../translate_ldap_attribute_test.go          |   2 +
 .../system/config/filestream-fixup-id.yml.j2  |   2 +
 filebeat/tests/system/test_reload_inputs.py   |   2 +
 libbeat/tests/integration/framework.go        |   5 +
 x-pack/filebeat/filebeat.reference.yml        |   9 +-
 34 files changed, 1142 insertions(+), 284 deletions(-)
 create mode 100644 filebeat/input/filestream/testdata/log.log

diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index d27a203b4aa..ff1c7fea449 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -52,6 +52,8 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 
 - Add kafka compression support for ZSTD.
 
+- Filebeat fails to start if there is any input with a duplicated ID. It logs the duplicated IDs and the offending inputs configurations. {pull}41731[41731]
+- The Filestream input only starts to ingest a file when it is >= 1024 bytes in size. This happens because the fingerprint` is the default file identity now. To restore the previous behaviour, set `file_identity.native: ~` and `prospector.scanner.fingerprint.enabled: false` {issue}40197[40197] {pull}41762[41762]
 *Heartbeat*
 
 
@@ -357,6 +359,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 - Add support for SSL and Proxy configurations for websoket type in streaming input. {pull}41934[41934]
 - AWS S3 input registry cleanup for untracked s3 objects. {pull}41694[41694]
 - The environment variable `BEATS_AZURE_EVENTHUB_INPUT_TRACING_ENABLED: true` enables internal logs tracer for the azure-eventhub input. {issue}41931[41931] {pull}41932[41932]
+- The Filestream input now uses the `fingerprint` file identity by default. The state from files are automatically migrated if the previous file identity was `native` (the default) or `path`. If the `file_identity` is explicitly set, there is no change in behaviour. {issue}40197[40197] {pull}41762[41762]
 - Rate limiting operability improvements in the Okta provider of the Entity Analytics input. {issue}40106[40106] {pull}41977[41977]
 - Added default values in the streaming input for websocket retries and put a cap on retry wait time to be lesser than equal to the maximum defined wait time. {pull}42012[42012]
 
diff --git a/filebeat/_meta/config/filebeat.global.reference.yml.tmpl b/filebeat/_meta/config/filebeat.global.reference.yml.tmpl
index 0287fb3f9f5..9d0a3c23974 100644
--- a/filebeat/_meta/config/filebeat.global.reference.yml.tmpl
+++ b/filebeat/_meta/config/filebeat.global.reference.yml.tmpl
@@ -15,6 +15,8 @@
 # batch of events has been published successfully. The default value is 1s.
 #filebeat.registry.flush: 1s
 
+# The interval which to run the registry clean up
+#filebeat.registry.cleanup_interval: 5m
 
 # Starting with Filebeat 7.0, the registry uses a new directory format to store
 # Filebeat state. After you upgrade, Filebeat will automatically migrate a 6.x
diff --git a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
index ba658819582..5e44bcdb09e 100644
--- a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
+++ b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
@@ -303,7 +303,7 @@ filebeat.inputs:
   # If enabled, instead of relying on the device ID and inode values when comparing files,
   # compare hashes of the given byte ranges in files. A file becomes an ingest target
   # when its size grows larger than offset+length (see below). Until then it's ignored.
-  #prospector.scanner.fingerprint.enabled: false
+  #prospector.scanner.fingerprint.enabled: true
 
   # If fingerprint mode is enabled, sets the offset from the beginning of the file
   # for the byte range used for computing the fingerprint value.
@@ -438,8 +438,9 @@ filebeat.inputs:
   #clean_removed: true
 
   # Method to determine if two files are the same or not. By default
-  # the Beat considers two files the same if their inode and device id are the same.
-  #file_identity.native: ~
+  # a fingerprint is generated using the first 1024 bytes of the file,
+  # if the fingerprints match, then the files are considered equal.
+  #file_identity.fingerprint: ~
 
   # Optional additional fields. These fields can be freely picked
   # to add additional information to the crawled log files for filtering
diff --git a/filebeat/docs/faq.asciidoc b/filebeat/docs/faq.asciidoc
index ddcdb6a8898..ee7ceeabad8 100644
--- a/filebeat/docs/faq.asciidoc
+++ b/filebeat/docs/faq.asciidoc
@@ -19,6 +19,10 @@ We do not recommend reading log files from network volumes. Whenever possible, i
 send the log files directly from there. Reading files from network volumes (especially on Windows) can have unexpected side
 effects. For example, changed file identifiers may result in {beatname_uc} reading a log file from scratch again.
 
+If it is not possible to read from the host, then using the
+<<filebeat-input-filestream-file-identity-fingerprint, `fingerprint`>>
+file identity is the next best option.
+
 [[filebeat-not-collecting-lines]]
 === {beatname_uc} isn't collecting lines from a file
 
@@ -71,6 +75,13 @@ By default states are never removed from the registry file. To resolve the inode
 
 You can use <<{beatname_lc}-input-log-clean-removed,`clean_removed`>> for files that are removed from disk. Be aware that `clean_removed` cleans the file state from the registry whenever a file cannot be found during a scan. If the file shows up again later, it will be sent again from scratch.
 
+Aside from that you should also change the
+<<filebeat-input-filestream-file-identity, `file_identity`>> to
+<<filebeat-input-filestream-file-identity-fingerprint,
+`fingerprint`>>. If you were using `native` (the default) or `path`,
+the state of the files will be automatically migrated to
+`fingerprint`.
+
 include::filebeat-log-rotation.asciidoc[]
 
 [[windows-file-rotation]]
diff --git a/filebeat/docs/inputs/input-filestream-file-options.asciidoc b/filebeat/docs/inputs/input-filestream-file-options.asciidoc
index 5436d3863dc..b87d9e67af6 100644
--- a/filebeat/docs/inputs/input-filestream-file-options.asciidoc
+++ b/filebeat/docs/inputs/input-filestream-file-options.asciidoc
@@ -150,9 +150,9 @@ The default setting is 10s.
 [id="{beatname_lc}-input-{type}-scan-fingerprint"]
 ===== `prospector.scanner.fingerprint`
 
-Instead of relying on the device ID and inode values when comparing files, compare hashes of the given byte ranges of files.
-
-Enable this option if you're experiencing data loss or data duplication due to unstable file identifiers provided by the file system.
+Instead of relying on the device ID and inode values when comparing
+files, compare hashes of the given byte ranges of files. This is the
+default behaviour for {beatname_uc}.
 
 Following are some scenarios where this can happen:
 
@@ -542,19 +542,54 @@ indirectly set higher priorities on certain inputs by assigning a higher
 limit of harvesters.
 
 [float]
+[id="{beatname_lc}-input-{type}-file-identity"]
 ===== `file_identity`
 
 Different `file_identity` methods can be configured to suit the
 environment where you are collecting log messages.
 
-WARNING: Changing `file_identity` methods between runs may result in
-duplicated events in the output.
+IMPORTANT: Changing `file_identity` is only supported from `native` or
+`path` to `fingerprint`. On those cases {beatname_uc} will
+automatically migrate the state of the file when {type} starts.
+
+WARNING: Any unsupported change in `file_identity` methods between
+runs may result in duplicated events in the output.
+
+[id="{beatname_lc}-input-{type}-file-identity-fingerprint"]
+*`fingerprint`*:: The default behaviour of {beatname_uc} is to
+identify files based on content by hashing a specific range (0 to 1024
+bytes by default).
+
+WARNING: In order to use this file identity option, you must enable
+the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint
+option in the scanner>>. Once this file identity is enabled, changing
+the fingerprint configuration (offset, length, or other settings) will
+lead to a global re-ingestion of all files that match the paths
+configuration of the input.
+
+Please refer to the
+<<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint
+configuration for details>>.
+
+[source,yaml]
+----
+file_identity.fingerprint: ~
+----
 
-*`native`*:: The default behaviour of {beatname_uc} is to differentiate
-between files using their inodes and device ids.
+*`native`*:: Differentiates between files using their inodes and
+device ids.
 +
 In some cases these values can change during the lifetime of a file. 
-For example, when using the Linux link:https://en.wikipedia.org/wiki/Logical_Volume_Manager_%28Linux%29[LVM] (Logical Volume Manager), device numbers are allocated dynamically at module load (refer to link:https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/logical_volume_manager_administration/lv#persistent_numbers[Persistent Device Numbers] in the Red Hat Enterprise Linux documentation). To avoid the possibility of data duplication in this case, you can set `file_identity` to `path` rather than `native`.
+For example, when using the Linux
+link:https://en.wikipedia.org/wiki/Logical_Volume_Manager_%28Linux%29[LVM]
+(Logical Volume Manager), device numbers are allocated dynamically at
+module load (refer to
+link:https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/logical_volume_manager_administration/lv#persistent_numbers[Persistent
+Device Numbers] in the Red Hat Enterprise Linux documentation). To
+avoid the possibility of data duplication in this case, you can set
+`file_identity` to `fingerprint` rather than the default `native`.
++
+The states of files generated by `native` file identity can be migrated to `fingerprint`.
 
 [source,yaml]
 ----
@@ -562,14 +597,16 @@ file_identity.native: ~
 ----
 
 *`path`*:: To identify files based on their paths use this strategy.
-
++
 WARNING: Only use this strategy if your log files are rotated to a folder
 outside of the scope of your input or not at all. Otherwise you end up
 with duplicated events.
-
++
 WARNING: This strategy does not support renaming files.
 If an input file is renamed, {beatname_uc} will read it again if the new path
 matches the settings of the input.
++
+The states of files generated by `path` file identity can be migrated to `fingerprint`.
 
 [source,yaml]
 ----
@@ -578,7 +615,7 @@ file_identity.path: ~
 
 *`inode_marker`*:: If the device id changes from time to time, you must use
 this method to distinguish files. This option is not supported on Windows.
-
++
 Set the location of the marker file the following way:
 
 [source,yaml]
@@ -586,17 +623,6 @@ Set the location of the marker file the following way:
 file_identity.inode_marker.path: /logs/.filebeat-marker
 ----
 
-*`fingerprint`*:: To identify files based on their content byte range.
-
-WARNING: In order to use this file identity option, you must enable the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint option in the scanner>>. Once this file identity is enabled, changing the fingerprint configuration (offset, length, or other settings) will lead to a global re-ingestion of all files that match the paths configuration of the input.
-
-Please refer to the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint configuration for details>>.
-
-[source,yaml]
-----
-file_identity.fingerprint: ~
-----
-
 [[filestream-log-rotation-support]]
 [float]
 === Log rotation
@@ -609,6 +635,7 @@ When reading from rotating files make sure the paths configuration includes
 both the active file and all rotated files.
 
 By default, {beatname_uc} is able to track files correctly in the following strategies:
+
 * create: new active file with a unique name is created on rotation
 * rename: rotated files are renamed
 
diff --git a/filebeat/docs/inputs/input-filestream.asciidoc b/filebeat/docs/inputs/input-filestream.asciidoc
index 54283d6cce7..74b7514b91a 100644
--- a/filebeat/docs/inputs/input-filestream.asciidoc
+++ b/filebeat/docs/inputs/input-filestream.asciidoc
@@ -34,6 +34,11 @@ The `log` writes the complete file state.
 
 7. Stale entries can be removed from the registry, even if there is no active input.
 
+8. The default behaviour is to identify files based on their contents
+using the <<filebeat-input-filestream-file-identity-fingerprint,
+`fingerprint`>> <<filebeat-input-filestream-file-identity,
+`file_identity`>> This solves data duplication caused by inode reuse.
+
 To configure this input, specify a list of glob-based <<filestream-input-paths,`paths`>>
 that must be crawled to locate and fetch the log lines.
 
@@ -86,20 +91,32 @@ multiple input sections:
 [[filestream-file-identity]]
 ==== Reading files on network shares and cloud providers
 
-WARNING: Filebeat does not support reading from network shares and cloud providers.
+WARNING: Some file identity methods do not support reading from
+network shares and cloud providers, to avoid duplicating events, use
+the default `file_identity`: `fingerprint`.
+
+IMPORTANT: Changing `file_identity` is only supported when
+migrating from `native` or `path` to `fingerprint`.
+
+WARNING: Any unsupported change in `file_identity` methods between
+runs may result in duplicated events in the output.
 
-However, one of the limitations of these data sources can be mitigated
-if you configure Filebeat adequately.
+`fingerprint` is the default and recommended file identity because it does not
+rely on the file system/OS, it generates a hash from a portion of the
+file (the first 1024 bytes, by default) and uses that to identify the
+file. This works well with log rotation strategies that move/rename
+the file and on Windows as file identifiers might be more
+volatile. The downside is that {beatname_uc} will wait until the file
+reaches 1024 bytes before start ingesting any file.
 
-By default, {beatname_uc} identifies files based on their inodes and
-device IDs. However, on network shares and cloud providers these
-values might change during the lifetime of the file. If this happens
-{beatname_uc} thinks that file is new and resends the whole content
-of the file. To solve this problem you can configure the `file_identity` option. Possible
-values besides the default `inode_deviceid` are `path`, `inode_marker` and `fingerprint`.
+WARNING: Once this file identity is enabled, changing
+the fingerprint configuration (offset, length, etc) will lead to a
+global re-ingestion of all files that match the paths configuration of
+the input.
 
-WARNING: Changing `file_identity` methods between runs may result in
-duplicated events in the output.
+Please refer to the
+<<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint
+configuration for details>>.
 
 Selecting `path` instructs {beatname_uc} to identify files based on their
 paths. This is a quick way to avoid rereading files if inode and device ids
@@ -117,13 +134,6 @@ example oneliner generates a hidden marker file for the selected mountpoint `/lo
 Please note that you should not use this option on Windows as file identifiers might be
 more volatile.
 
-Selecting `fingerprint` instructs {beatname_uc} to identify files based on their
-content byte range.
-
-WARNING: In order to use this file identity option, one must enable the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint option in the scanner>>. Once this file identity is enabled, changing the fingerprint configuration (offset, length, etc) will lead to a global re-ingestion of all files that match the paths configuration of the input.
-
-Please refer to the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint configuration for details>>.
-
 ["source","sh",subs="attributes"]
 ----
 $ lsblk -o MOUNTPOINT,UUID | grep /logs | awk '{print $2}' >> /logs/.filebeat-marker
diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml
index 700f3d8e788..0b35505cd33 100644
--- a/filebeat/filebeat.reference.yml
+++ b/filebeat/filebeat.reference.yml
@@ -716,7 +716,7 @@ filebeat.inputs:
   # If enabled, instead of relying on the device ID and inode values when comparing files,
   # compare hashes of the given byte ranges in files. A file becomes an ingest target
   # when its size grows larger than offset+length (see below). Until then it's ignored.
-  #prospector.scanner.fingerprint.enabled: false
+  #prospector.scanner.fingerprint.enabled: true
 
   # If fingerprint mode is enabled, sets the offset from the beginning of the file
   # for the byte range used for computing the fingerprint value.
@@ -851,8 +851,9 @@ filebeat.inputs:
   #clean_removed: true
 
   # Method to determine if two files are the same or not. By default
-  # the Beat considers two files the same if their inode and device id are the same.
-  #file_identity.native: ~
+  # a fingerprint is generated using the first 1024 bytes of the file,
+  # if the fingerprints match, then the files are considered equal.
+  #file_identity.fingerprint: ~
 
   # Optional additional fields. These fields can be freely picked
   # to add additional information to the crawled log files for filtering
@@ -1266,6 +1267,8 @@ filebeat.inputs:
 # batch of events has been published successfully. The default value is 1s.
 #filebeat.registry.flush: 1s
 
+# The interval which to run the registry clean up
+#filebeat.registry.cleanup_interval: 5m
 
 # Starting with Filebeat 7.0, the registry uses a new directory format to store
 # Filebeat state. After you upgrade, Filebeat will automatically migrate a 6.x
diff --git a/filebeat/include/list.go b/filebeat/include/list.go
index e2a656a2a85..ae05c332eaa 100644
--- a/filebeat/include/list.go
+++ b/filebeat/include/list.go
@@ -28,6 +28,7 @@ import (
 	// Import packages that perform 'func init()'.
 	_ "github.com/elastic/beats/v7/filebeat/input"
 	_ "github.com/elastic/beats/v7/filebeat/input/container"
+	_ "github.com/elastic/beats/v7/filebeat/input/filestream"
 	_ "github.com/elastic/beats/v7/filebeat/input/log"
 	_ "github.com/elastic/beats/v7/filebeat/input/mqtt"
 	_ "github.com/elastic/beats/v7/filebeat/input/redis"
diff --git a/filebeat/input/filestream/environment_test.go b/filebeat/input/filestream/environment_test.go
index f9804bb16f3..80460d6b3b4 100644
--- a/filebeat/input/filestream/environment_test.go
+++ b/filebeat/input/filestream/environment_test.go
@@ -386,6 +386,7 @@ func getIDFromPath(filepath, inputID string, fi os.FileInfo) string {
 
 // waitUntilEventCount waits until total count events arrive to the client.
 func (e *inputTestingEnvironment) waitUntilEventCount(count int) {
+	e.t.Helper()
 	msg := &strings.Builder{}
 	require.Eventuallyf(e.t, func() bool {
 		msg.Reset()
@@ -448,9 +449,14 @@ func (e *inputTestingEnvironment) waitUntilAtLeastEventCount(count int) {
 // waitUntilHarvesterIsDone detects Harvester stop by checking if the last client has been closed
 // as when a Harvester stops the client is closed.
 func (e *inputTestingEnvironment) waitUntilHarvesterIsDone() {
-	for !e.pipeline.clients[len(e.pipeline.clients)-1].closed {
-		time.Sleep(10 * time.Millisecond)
-	}
+	require.Eventually(
+		e.t,
+		func() bool {
+			return e.pipeline.clients[len(e.pipeline.clients)-1].closed
+		},
+		time.Second*10,
+		time.Millisecond*10,
+		"The last connected client has not closed it's connection")
 }
 
 // requireEventsReceived requires that the list of messages has made it into the output.
diff --git a/filebeat/input/filestream/fswatch.go b/filebeat/input/filestream/fswatch.go
index c51d850bbd2..00d84ed9ab4 100644
--- a/filebeat/input/filestream/fswatch.go
+++ b/filebeat/input/filestream/fswatch.go
@@ -278,7 +278,7 @@ func defaultFileScannerConfig() fileScannerConfig {
 		Symlinks:      false,
 		RecursiveGlob: true,
 		Fingerprint: fingerprintConfig{
-			Enabled: false,
+			Enabled: true,
 			Offset:  0,
 			Length:  DefaultFingerprintSize,
 		},
diff --git a/filebeat/input/filestream/fswatch_test.go b/filebeat/input/filestream/fswatch_test.go
index 9fae0481ca6..03674772e0d 100644
--- a/filebeat/input/filestream/fswatch_test.go
+++ b/filebeat/input/filestream/fswatch_test.go
@@ -222,6 +222,7 @@ scanner:
 		paths := []string{filepath.Join(dir, "*.log")}
 		cfgStr := `
 scanner:
+  fingerprint.enabled: false
   check_interval: 10ms
 `
 
@@ -260,6 +261,7 @@ scanner:
 		paths := []string{filepath.Join(dir, "*.log")}
 		cfgStr := `
 scanner:
+  fingerprint.enabled: false
   check_interval: 50ms
 `
 
@@ -370,6 +372,7 @@ scanner:
 		}
 		cfgStr := `
 scanner:
+  fingerprint.enabled: false
   check_interval: 100ms
 `
 
@@ -615,6 +618,7 @@ scanner:
 			name: "returns no symlink if the original file is excluded",
 			cfgStr: `
 scanner:
+  fingerprint.enabled: false
   exclude_files: ['.*exclude.*', '.*traveler.*']
   symlinks: true
 `,
@@ -661,6 +665,7 @@ scanner:
 			name: "returns no included symlink if the original file is not included",
 			cfgStr: `
 scanner:
+  fingerprint.enabled: false
   include_files: ['.*include.*', '.*portal.*']
   symlinks: true
 `,
@@ -678,6 +683,7 @@ scanner:
 			name: "returns an included symlink if the original file is included",
 			cfgStr: `
 scanner:
+  fingerprint.enabled: false
   include_files: ['.*include.*', '.*portal.*', '.*traveler.*']
   symlinks: true
 `,
diff --git a/filebeat/input/filestream/identifier.go b/filebeat/input/filestream/identifier.go
index a0cd7903e7a..08bb0c5f071 100644
--- a/filebeat/input/filestream/identifier.go
+++ b/filebeat/input/filestream/identifier.go
@@ -76,7 +76,7 @@ func (f fileSource) Name() string {
 // newFileIdentifier creates a new state identifier for a log input.
 func newFileIdentifier(ns *conf.Namespace, suffix string) (fileIdentifier, error) {
 	if ns == nil {
-		i, err := newINodeDeviceIdentifier(nil)
+		i, err := newFingerprintIdentifier(nil)
 		if err != nil {
 			return nil, err
 		}
diff --git a/filebeat/input/filestream/identifier_test.go b/filebeat/input/filestream/identifier_test.go
index 1fcd4d73efa..f2cd0102823 100644
--- a/filebeat/input/filestream/identifier_test.go
+++ b/filebeat/input/filestream/identifier_test.go
@@ -18,7 +18,6 @@
 package filestream
 
 import (
-	"io/ioutil"
 	"os"
 	"testing"
 
@@ -35,12 +34,17 @@ type testFileIdentifierConfig struct {
 }
 
 func TestFileIdentifier(t *testing.T) {
-	t.Run("default file identifier", func(t *testing.T) {
-		identifier, err := newFileIdentifier(nil, "")
+	t.Run("native file identifier", func(t *testing.T) {
+		cfg := conf.MustNewConfigFrom(`native: ~`)
+		ns := conf.Namespace{}
+		if err := cfg.Unpack(&ns); err != nil {
+			t.Fatalf("cannot unpack config into conf.Namespace: %s", err)
+		}
+		identifier, err := newFileIdentifier(&ns, "")
 		require.NoError(t, err)
 		assert.Equal(t, DefaultIdentifierName, identifier.Name())
 
-		tmpFile, err := ioutil.TempFile("", "test_file_identifier_native")
+		tmpFile, err := os.CreateTemp("", "test_file_identifier_native")
 		if err != nil {
 			t.Fatalf("cannot create temporary file for test: %v", err)
 		}
@@ -59,12 +63,17 @@ func TestFileIdentifier(t *testing.T) {
 		assert.Equal(t, identifier.Name()+"::"+file.GetOSState(fi).String(), src.Name())
 	})
 
-	t.Run("default file identifier with suffix", func(t *testing.T) {
-		identifier, err := newFileIdentifier(nil, "my-suffix")
+	t.Run("native file identifier with suffix", func(t *testing.T) {
+		cfg := conf.MustNewConfigFrom(`native: ~`)
+		ns := conf.Namespace{}
+		if err := cfg.Unpack(&ns); err != nil {
+			t.Fatalf("cannot unpack config into conf.Namespace: %s", err)
+		}
+		identifier, err := newFileIdentifier(&ns, "my-suffix")
 		require.NoError(t, err)
 		assert.Equal(t, DefaultIdentifierName, identifier.Name())
 
-		tmpFile, err := ioutil.TempFile("", "test_file_identifier_native")
+		tmpFile, err := os.CreateTemp("", "test_file_identifier_native")
 		if err != nil {
 			t.Fatalf("cannot create temporary file for test: %v", err)
 		}
diff --git a/filebeat/input/filestream/input_integration_test.go b/filebeat/input/filestream/input_integration_test.go
index 80327d8bcf2..5c063481dd5 100644
--- a/filebeat/input/filestream/input_integration_test.go
+++ b/filebeat/input/filestream/input_integration_test.go
@@ -52,11 +52,13 @@ func TestFilestreamCloseRenamed(t *testing.T) {
 	// the output to receive the event and then close the source file.
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                   id,
-		"paths":                                []string{env.abspath(testlogName) + "*"},
-		"prospector.scanner.check_interval":    "10ms",
-		"close.on_state_change.check_interval": "1ms",
-		"close.on_state_change.renamed":        "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName) + "*"},
+		"prospector.scanner.check_interval":      "10ms",
+		"close.on_state_change.check_interval":   "1ms",
+		"close.on_state_change.renamed":          "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first log line\n")
@@ -94,9 +96,11 @@ func TestFilestreamMetadataUpdatedOnRename(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                id,
-		"paths":                             []string{env.abspath(testlogName) + "*"},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName) + "*"},
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testline := []byte("log line\n")
@@ -132,11 +136,13 @@ func TestFilestreamCloseRemoved(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                   id,
-		"paths":                                []string{env.abspath(testlogName) + "*"},
-		"prospector.scanner.check_interval":    "24h",
-		"close.on_state_change.check_interval": "1ms",
-		"close.on_state_change.removed":        "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName) + "*"},
+		"prospector.scanner.check_interval":      "24h",
+		"close.on_state_change.check_interval":   "1ms",
+		"close.on_state_change.removed":          "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first log line\n")
@@ -173,10 +179,12 @@ func TestFilestreamCloseEOF(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                id,
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "24h",
-		"close.reader.on_eof":               "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "24h",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
+		"close.reader.on_eof":                    "true",
 	})
 
 	testlines := []byte("first log line\n")
@@ -209,9 +217,11 @@ func TestFilestreamEmptyLine(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                id,
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -248,9 +258,11 @@ func TestFilestreamEmptyLinesOnly(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                id,
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -272,8 +284,10 @@ func TestFilestreamBOMUTF8(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":    id,
-		"paths": []string{env.abspath(testlogName)},
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	// BOM: 0xEF,0xBB,0xBF
@@ -315,9 +329,11 @@ func TestFilestreamUTF16BOMs(t *testing.T) {
 			testlogName := "test.log"
 			id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 			inp := env.mustCreateInput(map[string]interface{}{
-				"id":       id,
-				"paths":    []string{env.abspath(testlogName)},
-				"encoding": name,
+				"id":                                     id,
+				"paths":                                  []string{env.abspath(testlogName)},
+				"encoding":                               name,
+				"prospector.scanner.fingerprint.enabled": false,
+				"file_identity.native":                   map[string]any{},
 			})
 
 			line := []byte("first line\n")
@@ -348,11 +364,13 @@ func TestFilestreamCloseTimeout(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                   id,
-		"paths":                                []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":    "24h",
-		"close.on_state_change.check_interval": "100ms",
-		"close.reader.after_interval":          "500ms",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "24h",
+		"close.on_state_change.check_interval":   "100ms",
+		"close.reader.after_interval":            "500ms",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\n")
@@ -382,11 +400,13 @@ func TestFilestreamCloseAfterInterval(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                   id,
-		"paths":                                []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":    "24h",
-		"close.on_state_change.check_interval": "100ms",
-		"close.on_state_change.inactive":       "2s",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "24h",
+		"close.on_state_change.check_interval":   "100ms",
+		"close.on_state_change.inactive":         "2s",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\nsecond line\nthird line\n")
@@ -417,7 +437,9 @@ func TestFilestreamCloseAfterIntervalRemoved(t *testing.T) {
 		"close.on_state_change.inactive":       "100ms",
 		// reader is not stopped when file is removed to see if the reader can still detect
 		// if the file has been inactive even if it have been removed in the meantime
-		"close.on_state_change.removed": "false",
+		"close.on_state_change.removed":          "false",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\nsecond line\nthird line\n")
@@ -450,7 +472,9 @@ func TestFilestreamCloseAfterIntervalRenamed(t *testing.T) {
 		"close.on_state_change.inactive":       "100ms",
 		// reader is not stopped when file is removed to see if the reader can still detect
 		// if the file has been inactive even if it have been removed in the meantime
-		"close.on_state_change.removed": "false",
+		"close.on_state_change.removed":          "false",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\nsecond line\nthird line\n")
@@ -485,7 +509,9 @@ func TestFilestreamCloseAfterIntervalRotatedAndRemoved(t *testing.T) {
 		"close.on_state_change.inactive":       "100ms",
 		// reader is not stopped when file is removed to see if the reader can still detect
 		// if the file has been inactive even if it have been removed in the meantime
-		"close.on_state_change.removed": "false",
+		"close.on_state_change.removed":          "false",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\nsecond line\nthird line\n")
@@ -514,11 +540,13 @@ func TestFilestreamCloseAfterIntervalRotatedAndNewRemoved(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                   id,
-		"paths":                                []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":    "1ms",
-		"close.on_state_change.check_interval": "10ms",
-		"close.on_state_change.inactive":       "100ms",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": "false",
+		"prospector.scanner.check_interval":      "1ms",
+		"close.on_state_change.check_interval":   "10ms",
+		"close.on_state_change.inactive":         "100ms",
 		// reader is not stopped when file is removed to see if the reader can still detect
 		// if the file has been inactive even if it have been removed in the meantime
 		"close.on_state_change.removed": "false",
@@ -558,10 +586,12 @@ func TestFilestreamTruncatedFileOpen(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                 id,
-		"paths":                              []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":  "1ms",
-		"prospector.scanner.resend_on_touch": "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.resend_on_touch":     "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -592,11 +622,13 @@ func TestFilestreamTruncatedFileClosed(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                 id,
-		"paths":                              []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":  "1ms",
-		"prospector.scanner.resend_on_touch": "true",
-		"close.reader.on_eof":                "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.resend_on_touch":     "true",
+		"close.reader.on_eof":                    "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -635,9 +667,11 @@ func TestFilestreamTruncateWithSymlink(t *testing.T) {
 			env.abspath(testlogName),
 			env.abspath(symlinkName),
 		},
-		"prospector.scanner.check_interval":  "1ms",
-		"prospector.scanner.resend_on_touch": "true",
-		"prospector.scanner.symlinks":        "true",
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.resend_on_touch":     "true",
+		"prospector.scanner.symlinks":            "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	lines := []byte("first line\nsecond line\nthird line\n")
@@ -675,10 +709,12 @@ func TestFilestreamTruncateBigScannerInterval(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                 id,
-		"paths":                              []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":  "5s",
-		"prospector.scanner.resend_on_touch": "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "5s",
+		"prospector.scanner.resend_on_touch":     "true",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -707,10 +743,12 @@ func TestFilestreamTruncateCheckOffset(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                 id,
-		"paths":                              []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":  "1ms",
-		"prospector.scanner.resend_on_touch": "true",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.resend_on_touch":     "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -737,9 +775,11 @@ func TestFilestreamTruncateBlockedOutput(t *testing.T) {
 	testlogName := "test.log"
 	id := "fake-ID-" + uuid.Must(uuid.NewV4()).String()
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                id,
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "200ms",
+		"id":                                     id,
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "200ms",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\nsecond line\n")
@@ -792,7 +832,9 @@ func TestFilestreamSymlinksEnabled(t *testing.T) {
 		"paths": []string{
 			env.abspath(symlinkName),
 		},
-		"prospector.scanner.symlinks": "true",
+		"prospector.scanner.symlinks":            "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\n")
@@ -824,10 +866,12 @@ func TestFilestreamSymlinkRotated(t *testing.T) {
 		"paths": []string{
 			env.abspath(symlinkName),
 		},
-		"prospector.scanner.check_interval": "1ms",
-		"prospector.scanner.symlinks":       "true",
-		"close.on_state_change.removed":     "false",
-		"clean_removed":                     "false",
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.symlinks":            "true",
+		"close.on_state_change.removed":          "false",
+		"clean_removed":                          "false",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	commonLine := "first line in file "
@@ -874,10 +918,12 @@ func TestFilestreamSymlinkRemoved(t *testing.T) {
 		"paths": []string{
 			env.abspath(symlinkName),
 		},
-		"prospector.scanner.check_interval": "1ms",
-		"prospector.scanner.symlinks":       "true",
-		"close.on_state_change.removed":     "false",
-		"clean_removed":                     "false",
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.symlinks":            "true",
+		"close.on_state_change.removed":          "false",
+		"clean_removed":                          "false",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	line := []byte("first line\n")
@@ -918,9 +964,11 @@ func TestFilestreamTruncate(t *testing.T) {
 		"paths": []string{
 			env.abspath("*"),
 		},
-		"prospector.scanner.check_interval":  "1ms",
-		"prospector.scanner.resend_on_touch": "true",
-		"prospector.scanner.symlinks":        "true",
+		"prospector.scanner.check_interval":      "1ms",
+		"prospector.scanner.resend_on_touch":     "true",
+		"prospector.scanner.symlinks":            "true",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	lines := []byte("first line\nsecond line\nthird line\n")
@@ -978,6 +1026,8 @@ func TestFilestreamHarvestAllFilesWhenHarvesterLimitExceeded(t *testing.T) {
 		"paths": []string{
 			env.abspath(logFiles[0].path),
 			env.abspath(logFiles[1].path)},
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
@@ -994,8 +1044,10 @@ func TestGlobalIDCannotBeUsed(t *testing.T) {
 	env := newInputTestingEnvironment(t)
 	testlogName := "test.log"
 	_, err := env.createInput(map[string]interface{}{
-		"id":    ".global",
-		"paths": []string{env.abspath(testlogName) + "*"},
+		"id":                                     ".global",
+		"paths":                                  []string{env.abspath(testlogName) + "*"},
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 	if err == nil {
 		t.Fatal("expecting an error because '.global' cannot be used as input ID")
@@ -1013,10 +1065,12 @@ func TestRotatingCloseInactiveLargerWriteRate(t *testing.T) {
 		"paths": []string{
 			env.abspath("*"),
 		},
-		"prospector.scanner.check_interval":    "100ms",
-		"close.on_state_change.check_interval": "1s",
-		"close.on_state_change.inactive":       "5s",
-		"ignore_older":                         "10s",
+		"prospector.scanner.check_interval":      "100ms",
+		"close.on_state_change.check_interval":   "1s",
+		"close.on_state_change.inactive":         "5s",
+		"ignore_older":                           "10s",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
@@ -1060,10 +1114,12 @@ func TestRotatingCloseInactiveLowWriteRate(t *testing.T) {
 		"paths": []string{
 			env.abspath("*"),
 		},
-		"prospector.scanner.check_interval":    "1ms",
-		"close.on_state_change.check_interval": "1ms",
-		"close.on_state_change.inactive":       "1s",
-		"ignore_older":                         "10s",
+		"prospector.scanner.check_interval":      "1ms",
+		"close.on_state_change.check_interval":   "1ms",
+		"close.on_state_change.inactive":         "1s",
+		"ignore_older":                           "10s",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	ctx, cancelInput := context.WithCancel(context.Background())
diff --git a/filebeat/input/filestream/input_test.go b/filebeat/input/filestream/input_test.go
index 3dfe176ac01..735ea0d0ffe 100644
--- a/filebeat/input/filestream/input_test.go
+++ b/filebeat/input/filestream/input_test.go
@@ -50,6 +50,7 @@ func BenchmarkFilestream(b *testing.B) {
 			cfg := `
 type: filestream
 prospector.scanner.check_interval: 1s
+prospector.scanner.fingerprint.enabled: false
 paths:
     - ` + filename + `
 `
@@ -91,6 +92,7 @@ paths:
 			cfg := `
 type: filestream
 prospector.scanner.check_interval: 1s
+prospector.scanner.fingerprint.enabled: false
 paths:
     - ` + ingestPath + `
 `
@@ -146,6 +148,7 @@ func TestTakeOverTags(t *testing.T) {
 			cfg := fmt.Sprintf(`
 type: filestream
 prospector.scanner.check_interval: 1s
+prospector.scanner.fingerprint.enabled: false
 take_over: %t
 paths:
     - %s`, testCase.takeOver, filename)
diff --git a/filebeat/input/filestream/internal/input-logfile/prospector.go b/filebeat/input/filestream/internal/input-logfile/prospector.go
index 733e55fe26e..2f90d440e36 100644
--- a/filebeat/input/filestream/internal/input-logfile/prospector.go
+++ b/filebeat/input/filestream/internal/input-logfile/prospector.go
@@ -56,14 +56,13 @@ type ProspectorCleaner interface {
 	// The function passed to UpdateIdentifiers must return an empty string if the key
 	// remains the same.
 	UpdateIdentifiers(func(v Value) (string, interface{}))
-
-	// FixUpIdentifiers migrates IDs in the registry from inputs
-	// that used the deprecated `.global` ID.
-	FixUpIdentifiers(func(v Value) (string, interface{}))
 }
 
 // Value contains the cursor metadata.
 type Value interface {
 	// UnpackCursorMeta returns the cursor metadata required by the prospector.
 	UnpackCursorMeta(to interface{}) error
+
+	// Key return the registry's key for this resource
+	Key() string
 }
diff --git a/filebeat/input/filestream/internal/input-logfile/store.go b/filebeat/input/filestream/internal/input-logfile/store.go
index 024ca5c9bfd..eb4f9cd7354 100644
--- a/filebeat/input/filestream/internal/input-logfile/store.go
+++ b/filebeat/input/filestream/internal/input-logfile/store.go
@@ -212,13 +212,28 @@ func (s *sourceStore) CleanIf(pred func(v Value) bool) {
 	}
 }
 
-// FixUpIdentifiers copies an existing resource to a new ID and marks the previous one
+// UpdateIdentifiers copies an existing resource to a new ID and marks the previous one
 // for removal.
-func (s *sourceStore) FixUpIdentifiers(getNewID func(v Value) (string, interface{})) {
+func (s *sourceStore) UpdateIdentifiers(getNewID func(v Value) (string, interface{})) {
 	s.store.ephemeralStore.mu.Lock()
 	defer s.store.ephemeralStore.mu.Unlock()
 
 	for key, res := range s.store.ephemeralStore.table {
+		// Entries in the registry are soft deleted, once the gcStore runs,
+		// they're actually removed from the in-memory registry (ephemeralStore)
+		// and marked as removed in the registry operations log. So we need
+		// to skip all entries that were soft deleted.
+		//
+		//  - res.internalState.TTL == 0: entry has been deleted
+		//  - res.internalState.TTL == -1: entry will never be removed by TTL
+		//  - res.internalState.TTL > 0: entry will be removed once its TTL
+		//    is reached
+		//
+		// If the entry has been deleted, skip it
+		if res.internalState.TTL == 0 {
+			continue
+		}
+
 		if !s.identifier.MatchesInput(key) {
 			continue
 		}
@@ -229,68 +244,30 @@ func (s *sourceStore) FixUpIdentifiers(getNewID func(v Value) (string, interface
 		}
 
 		newKey, updatedMeta := getNewID(res)
-		if len(newKey) > 0 && res.internalState.TTL > 0 {
+		if len(newKey) > 0 {
 			if _, ok := s.store.ephemeralStore.table[newKey]; ok {
 				res.lock.Unlock()
 				continue
 			}
 
-			// Pending updates due to events that have not yet been ACKed
-			// are not included in the copy. Collection on
-			// the copy start from the last known ACKed position.
-			// This might lead to data duplication because the harvester
-			// will pickup from the last ACKed position using the new key
-			// and the pending updates will affect the entry with the oldKey.
 			r := res.copyWithNewKey(newKey)
 			r.cursorMeta = updatedMeta
 			r.stored = false
+			// writeState only writes to the log file (disk)
+			// the write is synchronous
 			s.store.writeState(r)
 
 			// Add the new resource to the ephemeralStore so the rest of the
 			// codebase can have access to the new value
 			s.store.ephemeralStore.table[newKey] = r
 
-			// Remove the old key from the store
-			s.store.UpdateTTL(res, 0) // aka delete. See store.remove for details
-			s.store.log.Infof("migrated entry in registry from '%s' to '%s'", key, newKey)
-		}
-
-		res.lock.Unlock()
-	}
-}
-
-// UpdateIdentifiers copies an existing resource to a new ID and marks the previous one
-// for removal.
-func (s *sourceStore) UpdateIdentifiers(getNewID func(v Value) (string, interface{})) {
-	s.store.ephemeralStore.mu.Lock()
-	defer s.store.ephemeralStore.mu.Unlock()
-
-	for key, res := range s.store.ephemeralStore.table {
-		if !s.identifier.MatchesInput(key) {
-			continue
-		}
-
-		if !res.lock.TryLock() {
-			continue
-		}
-
-		newKey, updatedMeta := getNewID(res)
-		if len(newKey) > 0 && res.internalState.TTL > 0 {
-			if _, ok := s.store.ephemeralStore.table[newKey]; ok {
-				res.lock.Unlock()
-				continue
-			}
-
-			// Pending updates due to events that have not yet been ACKed
-			// are not included in the copy. Collection on
-			// the copy start from the last known ACKed position.
-			// This might lead to data duplication because the harvester
-			// will pickup from the last ACKed position using the new key
-			// and the pending updates will affect the entry with the oldKey.
-			r := res.copyWithNewKey(newKey)
-			r.cursorMeta = updatedMeta
-			r.stored = false
-			s.store.writeState(r)
+			// Remove the old key from the store aka delete. This is also
+			// synchronously written to the disk.
+			// We cannot use store.remove because it will
+			// acquire the same lock we hold, causing a deadlock.
+			// See store.remove for details.
+			s.store.UpdateTTL(res, 0)
+			s.store.log.Infof("migrated entry in registry from '%s' to '%s'. Cursor: %v", key, newKey, r.cursor)
 		}
 
 		res.lock.Unlock()
@@ -482,10 +459,16 @@ func (r *resource) UnpackCursor(to interface{}) error {
 	return typeconv.Convert(to, r.activeCursor())
 }
 
+// UnpackCursorMeta unpacks the cursor metadata's into the provided struct.
 func (r *resource) UnpackCursorMeta(to interface{}) error {
 	return typeconv.Convert(to, r.cursorMeta)
 }
 
+// Key returns the resource's key
+func (r *resource) Key() string {
+	return r.key
+}
+
 // syncStateSnapshot returns the current insync state based on already ACKed update operations.
 func (r *resource) inSyncStateSnapshot() state {
 	return state{
diff --git a/filebeat/input/filestream/internal/input-logfile/store_test.go b/filebeat/input/filestream/internal/input-logfile/store_test.go
index 6f19e1afad7..2d4f98b5d29 100644
--- a/filebeat/input/filestream/internal/input-logfile/store_test.go
+++ b/filebeat/input/filestream/internal/input-logfile/store_test.go
@@ -347,11 +347,11 @@ type testMeta struct {
 func TestSourceStore_UpdateIdentifiers(t *testing.T) {
 	t.Run("update identifiers when TTL is bigger than zero", func(t *testing.T) {
 		backend := createSampleStore(t, map[string]state{
-			"test::key1": {
+			"test::key1": { // Active resource
 				TTL:  60 * time.Second,
 				Meta: testMeta{IdentifierName: "method"},
 			},
-			"test::key2": {
+			"test::key2": { // Deleted resource
 				TTL:  0 * time.Second,
 				Meta: testMeta{IdentifierName: "method"},
 			},
@@ -372,22 +372,25 @@ func TestSourceStore_UpdateIdentifiers(t *testing.T) {
 			return "", nil
 		})
 
-		var newState state
-		s.persistentStore.Get("test::key1::updated", &newState)
+		// The persistentStore is a mock that does not consider if a state has
+		// been removed before returning it, thus allowing us to get Updated
+		// timestamp from when the resource was deleted.
+		var deletedState state
+		s.persistentStore.Get("test::key1", &deletedState)
 
 		want := map[string]state{
-			"test::key1": {
-				Updated: s.Get("test::key1").internalState.Updated,
-				TTL:     60 * time.Second,
+			"test::key1": { // old resource is deleted, TTL must be zero
+				Updated: deletedState.Updated,
+				TTL:     0 * time.Second,
 				Meta:    map[string]interface{}{"identifiername": "method"},
 			},
-			"test::key2": {
+			"test::key2": { // Unchanged
 				Updated: s.Get("test::key2").internalState.Updated,
 				TTL:     0 * time.Second,
 				Meta:    map[string]interface{}{"identifiername": "method"},
 			},
-			"test::key1::updated": {
-				Updated: newState.Updated,
+			"test::key1::updated": { // Updated resource
+				Updated: s.Get("test::key1::updated").internalState.Updated,
 				TTL:     60 * time.Second,
 				Meta:    map[string]interface{}{"identifiername": "something"},
 			},
diff --git a/filebeat/input/filestream/legacy_metrics_integration_test.go b/filebeat/input/filestream/legacy_metrics_integration_test.go
index 649ede41f3e..ec2e18a3706 100644
--- a/filebeat/input/filestream/legacy_metrics_integration_test.go
+++ b/filebeat/input/filestream/legacy_metrics_integration_test.go
@@ -41,6 +41,8 @@ filebeat.inputs:
     enabled: true
     close.reader.after_interval: 1s
     prospector.scanner.check_interval: 500ms
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     paths:
       - %s/*.filestream
   - type: log
@@ -48,6 +50,8 @@ filebeat.inputs:
     enabled: true
     close_timeout: 1s
     scan_frequency: 500ms
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     paths:
       - %s/*.log
 
@@ -71,7 +75,9 @@ func TestLegacyMetrics(t *testing.T) {
 	filebeat.WriteConfigFile(cfg)
 	filebeat.Start()
 
-	filebeat.WaitForLogs("Metrics endpoint listening on:", 10*time.Second)
+	filebeat.WaitForLogs("Metrics endpoint listening on:",
+		10*time.Second,
+		"metrics endpoint did not start")
 
 	// After starting Filebeat all counters must be zero
 	waitForMetrics(t,
diff --git a/filebeat/input/filestream/metrics_integration_test.go b/filebeat/input/filestream/metrics_integration_test.go
index 3671f076d0e..b551b2321b7 100644
--- a/filebeat/input/filestream/metrics_integration_test.go
+++ b/filebeat/input/filestream/metrics_integration_test.go
@@ -33,11 +33,13 @@ func TestFilestreamMetrics(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                   "fake-ID",
-		"paths":                                []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval":    "24h",
-		"close.on_state_change.check_interval": "100ms",
-		"close.on_state_change.inactive":       "2s",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "24h",
+		"close.on_state_change.check_interval":   "100ms",
+		"close.on_state_change.inactive":         "2s",
+		"prospector.scanner.fingerprint.enabled": false,
+		"file_identity.native":                   map[string]any{},
 	})
 
 	testlines := []byte("first line\nsecond line\nthird line\n")
diff --git a/filebeat/input/filestream/parsers_integration_test.go b/filebeat/input/filestream/parsers_integration_test.go
index 619d39f0512..858f4e6d1ce 100644
--- a/filebeat/input/filestream/parsers_integration_test.go
+++ b/filebeat/input/filestream/parsers_integration_test.go
@@ -29,9 +29,11 @@ func TestParsersAgentLogs(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"ndjson": map[string]interface{}{
@@ -65,9 +67,11 @@ func TestParsersIncludeMessage(t *testing.T) {
 	testlogName := "test.log"
 	readLine := "include this"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "100ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "100ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"include_message": map[string]interface{}{
@@ -98,9 +102,11 @@ func TestParsersDockerLogsFiltering(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"ndjson": map[string]interface{}{
@@ -137,9 +143,11 @@ func TestParsersSimpleJSONOverwrite(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"ndjson": map[string]interface{}{
@@ -173,9 +181,11 @@ func TestParsersTimestampInJSONMessage(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"ndjson": map[string]interface{}{
@@ -214,9 +224,11 @@ func TestParsersJavaElasticsearchLogs(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -249,9 +261,11 @@ func TestParsersCStyleLog(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -290,9 +304,11 @@ func TestParsersRabbitMQMultilineLog(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -335,9 +351,11 @@ func TestParsersMultilineMaxLines(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -379,9 +397,11 @@ func TestParsersMultilineTimeout(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -444,10 +464,12 @@ func TestParsersMultilineMaxBytes(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
-		"message_max_bytes":                 50,
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"message_max_bytes":                      50,
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -486,10 +508,12 @@ func TestParsersCloseTimeoutWithMultiline(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
-		"close.reader.after_interval":       "1s",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"close.reader.after_interval":            "1s",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
@@ -551,10 +575,12 @@ func TestParsersConsecutiveNewline(t *testing.T) {
 
 	testlogName := "test.log"
 	inp := env.mustCreateInput(map[string]interface{}{
-		"id":                                "fake-ID",
-		"paths":                             []string{env.abspath(testlogName)},
-		"prospector.scanner.check_interval": "1ms",
-		"close.reader.after_interval":       "1s",
+		"id":                                     "fake-ID",
+		"paths":                                  []string{env.abspath(testlogName)},
+		"prospector.scanner.check_interval":      "1ms",
+		"close.reader.after_interval":            "1s",
+		"file_identity.native":                   map[string]any{},
+		"prospector.scanner.fingerprint.enabled": false,
 		"parsers": []map[string]interface{}{
 			{
 				"multiline": map[string]interface{}{
diff --git a/filebeat/input/filestream/prospector.go b/filebeat/input/filestream/prospector.go
index 2bf737a86fd..1e3b7cb6c69 100644
--- a/filebeat/input/filestream/prospector.go
+++ b/filebeat/input/filestream/prospector.go
@@ -47,11 +47,31 @@ var ignoreInactiveSettings = map[string]ignoreInactiveType{
 	ignoreInactiveSinceFirstStartStr: IgnoreInactiveSinceFirstStart,
 }
 
+var identifiersMap = map[string]fileIdentifier{}
+
+func init() {
+	for name, factory := range identifierFactories {
+		if name == inodeMarkerName {
+			// inode marker requires an specific config we cannot infer.
+			continue
+		}
+
+		identifier, err := factory(nil)
+		if err != nil {
+			// Skip identifiers we cannot create. E.g: inode_marker is not
+			// supported on Windows
+			continue
+		}
+		identifiersMap[name] = identifier
+	}
+}
+
 // fileProspector implements the Prospector interface.
 // It contains a file scanner which returns file system events.
 // The FS events then trigger either new Harvester runs or updates
 // the statestore.
 type fileProspector struct {
+	logger              *logp.Logger
 	filewatcher         loginp.FSWatcher
 	identifier          fileIdentifier
 	ignoreOlder         time.Duration
@@ -70,7 +90,7 @@ func (p *fileProspector) Init(
 	// If this fileProspector belongs to an input that did not have an ID
 	// this will find its files in the registry and update them to use the
 	// new ID.
-	globalCleaner.FixUpIdentifiers(func(v loginp.Value) (id string, val interface{}) {
+	globalCleaner.UpdateIdentifiers(func(v loginp.Value) (id string, val interface{}) {
 		var fm fileMeta
 		err := v.UnpackCursorMeta(&fm)
 		if err != nil {
@@ -101,6 +121,16 @@ func (p *fileProspector) Init(
 	}
 
 	identifierName := p.identifier.Name()
+
+	// If the file identity has changed to fingerprint, update the registry
+	// keys so we can keep the state. This is only supported from file
+	// identities that do not require configuration:
+	//  - native (inode + device ID)
+	//  - path
+	if identifierName != fingerprintName {
+		p.logger.Debugf("file identity is '%s', will not migrate registry", identifierName)
+		return nil
+	}
 	cleaner.UpdateIdentifiers(func(v loginp.Value) (string, interface{}) {
 		var fm fileMeta
 		err := v.UnpackCursorMeta(&fm)
@@ -113,12 +143,58 @@ func (p *fileProspector) Init(
 			return "", fm
 		}
 
-		if fm.IdentifierName != identifierName {
-			newKey := p.identifier.GetSource(loginp.FSEvent{NewPath: fm.Source, Descriptor: fd}).Name()
-			fm.IdentifierName = identifierName
-			return newKey, fm
+		// Return early (do nothing) if:
+		//  - The identifiers are the same
+		//  - The old identifier is neither native nor path
+		oldIdentifierName := fm.IdentifierName
+		if oldIdentifierName == identifierName ||
+			!(oldIdentifierName == nativeName || oldIdentifierName == pathName) {
+			return "", nil
+		}
+
+		// Our current file (source) is in the registry, now we need to ensure
+		// this registry entry (resource) actually refers to our file. Sources
+		// are identified by path, however as log files rotate the same path
+		// can point to different files.
+		//
+		// So to ensure we're dealing with the resource from our current file,
+		// we use the old identifier to generate a registry key for the current
+		// file we're trying to migrate, if this key matches with the key in the
+		// registry, then we proceed to update the registry.
+		registryKey := v.Key()
+		oldIdentifier, ok := identifiersMap[oldIdentifierName]
+		if !ok {
+			// This should never happen, but just in case we properly handle it.
+			// If we cannot find the identifier, move on to the next entry
+			// some identifiers cannot be migrated
+			p.logger.Errorf(
+				"old file identity '%s' not found while migrating entry to"+
+					"new file identity '%s'. If the file still exists, it will be re-ingested",
+				oldIdentifierName,
+				identifierName,
+			)
+			return "", nil
 		}
-		return "", fm
+		previousIdentifierKey := newID(oldIdentifier.GetSource(
+			loginp.FSEvent{
+				NewPath:    fm.Source,
+				Descriptor: fd,
+			}))
+
+		// If the registry key and the key generated by the old identifier
+		// do not match, log it at debug level and do nothing.
+		if previousIdentifierKey != registryKey {
+			return "", fm
+		}
+
+		// The resource matches the file we found in the file system, generate
+		// a new registry key and return it alongside the updated meta.
+		newKey := newID(p.identifier.GetSource(loginp.FSEvent{NewPath: fm.Source, Descriptor: fd}))
+		fm.IdentifierName = identifierName
+		p.logger.Infof("registry key: '%s' and previous file identity key: '%s', are the same, migrating. Source: '%s'",
+			registryKey, previousIdentifierKey, fm.Source)
+
+		return newKey, fm
 	})
 
 	return nil
diff --git a/filebeat/input/filestream/prospector_creator.go b/filebeat/input/filestream/prospector_creator.go
index 5142704a614..91a5e0b30d3 100644
--- a/filebeat/input/filestream/prospector_creator.go
+++ b/filebeat/input/filestream/prospector_creator.go
@@ -53,9 +53,8 @@ func newProspector(config config) (loginp.Prospector, error) {
 		return nil, fmt.Errorf("error while creating file identifier: %w", err)
 	}
 
-	logp.L().
-		With("filestream_id", config.ID).
-		Debugf("file identity is set to %s", identifier.Name())
+	logger := logp.L().Named("input.filestream").With("filestream_id", config.ID)
+	logger.Debugf("file identity is set to %s", identifier.Name())
 
 	fileprospector := fileProspector{
 		filewatcher:         filewatcher,
@@ -64,6 +63,7 @@ func newProspector(config config) (loginp.Prospector, error) {
 		ignoreInactiveSince: config.IgnoreInactive,
 		cleanRemoved:        config.CleanRemoved,
 		stateChangeCloser:   config.Close.OnStateChange,
+		logger:              logger.Named("prospector"),
 	}
 	if config.Rotation == nil {
 		return &fileprospector, nil
diff --git a/filebeat/input/filestream/prospector_test.go b/filebeat/input/filestream/prospector_test.go
index 552b4218c78..c1e806e3948 100644
--- a/filebeat/input/filestream/prospector_test.go
+++ b/filebeat/input/filestream/prospector_test.go
@@ -22,13 +22,14 @@ import (
 	"context"
 	"fmt"
 	"io/fs"
-	"io/ioutil"
 	"os"
+	"path/filepath"
 	"sync"
 	"testing"
 	"time"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile"
 	input "github.com/elastic/beats/v7/filebeat/input/v2"
@@ -54,7 +55,8 @@ func TestProspector_InitCleanIfRemoved(t *testing.T) {
 		"prospector init with clean_removed disabled with entries": {
 			entries: map[string]loginp.Value{
 				"key1": &mockUnpackValue{
-					fileMeta{
+					key: "key1",
+					fileMeta: fileMeta{
 						Source:         "/no/such/path",
 						IdentifierName: "path",
 					},
@@ -67,7 +69,8 @@ func TestProspector_InitCleanIfRemoved(t *testing.T) {
 		"prospector init with clean_removed enabled with entries": {
 			entries: map[string]loginp.Value{
 				"key1": &mockUnpackValue{
-					fileMeta{
+					key: "key1",
+					fileMeta: fileMeta{
 						Source:         "/no/such/path",
 						IdentifierName: "path",
 					},
@@ -85,6 +88,7 @@ func TestProspector_InitCleanIfRemoved(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			testStore := newMockProspectorCleaner(testCase.entries)
 			p := fileProspector{
+				logger:       logp.L(),
 				identifier:   mustPathIdentifier(false),
 				cleanRemoved: testCase.cleanRemoved,
 				filewatcher:  newMockFileWatcherWithFiles(testCase.filesOnDisk),
@@ -97,13 +101,13 @@ func TestProspector_InitCleanIfRemoved(t *testing.T) {
 }
 
 func TestProspector_InitUpdateIdentifiers(t *testing.T) {
-	f, err := ioutil.TempFile("", "existing_file")
+	f, err := os.CreateTemp(t.TempDir(), "existing_file")
 	if err != nil {
 		t.Fatalf("cannot create temp file")
 	}
 	defer f.Close()
 	tmpFileName := f.Name()
-	fi, err := f.Stat()
+	fi, err := f.Stat() //nolint:typecheck // It is used on L151
 	if err != nil {
 		t.Fatalf("cannot stat test file: %v", err)
 	}
@@ -112,6 +116,7 @@ func TestProspector_InitUpdateIdentifiers(t *testing.T) {
 		entries             map[string]loginp.Value
 		filesOnDisk         map[string]loginp.FileDescriptor
 		expectedUpdatedKeys map[string]string
+		newKey              string
 	}{
 		"prospector init does not update keys if there are no entries": {
 			entries:             nil,
@@ -121,7 +126,8 @@ func TestProspector_InitUpdateIdentifiers(t *testing.T) {
 		"prospector init does not update keys of not existing files": {
 			entries: map[string]loginp.Value{
 				"not_path::key1": &mockUnpackValue{
-					fileMeta{
+					key: "not_path::key1",
+					fileMeta: fileMeta{
 						Source:         "/no/such/path",
 						IdentifierName: "not_path",
 					},
@@ -130,10 +136,11 @@ func TestProspector_InitUpdateIdentifiers(t *testing.T) {
 			filesOnDisk:         nil,
 			expectedUpdatedKeys: map[string]string{},
 		},
-		"prospector init updates keys of existing files": {
+		"prospector init does not update keys if new file identity is not fingerprint": {
 			entries: map[string]loginp.Value{
 				"not_path::key1": &mockUnpackValue{
-					fileMeta{
+					key: "not_path::key1",
+					fileMeta: fileMeta{
 						Source:         tmpFileName,
 						IdentifierName: "not_path",
 					},
@@ -142,7 +149,7 @@ func TestProspector_InitUpdateIdentifiers(t *testing.T) {
 			filesOnDisk: map[string]loginp.FileDescriptor{
 				tmpFileName: {Info: file.ExtendFileInfo(fi)},
 			},
-			expectedUpdatedKeys: map[string]string{"not_path::key1": "path::" + tmpFileName},
+			expectedUpdatedKeys: map[string]string{},
 		},
 	}
 
@@ -152,16 +159,139 @@ func TestProspector_InitUpdateIdentifiers(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			testStore := newMockProspectorCleaner(testCase.entries)
 			p := fileProspector{
+				logger:      logp.L(),
 				identifier:  mustPathIdentifier(false),
 				filewatcher: newMockFileWatcherWithFiles(testCase.filesOnDisk),
 			}
-			p.Init(testStore, newMockProspectorCleaner(nil), func(loginp.Source) string { return "" })
-
+			err := p.Init(testStore, newMockProspectorCleaner(nil), func(loginp.Source) string { return testCase.newKey })
+			require.NoError(t, err, "prospector Init must succeed")
 			assert.EqualValues(t, testCase.expectedUpdatedKeys, testStore.updatedKeys)
 		})
 	}
 }
 
+func TestMigrateRegistryToFingerprint(t *testing.T) {
+	const mockFingerprint = "the fingerprint from this file"
+	const mockInputPrefix = "test-input"
+
+	logFileFullPath, err := filepath.Abs(filepath.Join("testdata", "log.log"))
+	if err != nil {
+		t.Fatalf("cannot get absolute path from test file: %s", err)
+	}
+	f, err := os.Open(logFileFullPath)
+	if err != nil {
+		t.Fatalf("cannot open test file")
+	}
+	defer f.Close()
+	tmpFileName := f.Name()
+	fi, err := f.Stat()
+
+	fd := loginp.FileDescriptor{
+		Filename:    tmpFileName,
+		Info:        file.ExtendFileInfo(fi),
+		Fingerprint: mockFingerprint,
+	}
+
+	fingerprintIdentifier, _ := newFingerprintIdentifier(nil)
+	nativeIdentifier, _ := newINodeDeviceIdentifier(nil)
+	pathIdentifier, _ := newPathIdentifier(nil)
+	newIDFunc := func(s loginp.Source) string {
+		return mockInputPrefix + "-" + s.Name()
+	}
+
+	fsEvent := loginp.FSEvent{
+		OldPath:    logFileFullPath,
+		NewPath:    logFileFullPath,
+		Op:         loginp.OpCreate,
+		Descriptor: fd,
+	}
+
+	expectedNewKey := newIDFunc(fingerprintIdentifier.GetSource(fsEvent))
+
+	testCases := map[string]struct {
+		oldIdentifier           fileIdentifier
+		newIdentifier           fileIdentifier
+		expectRegistryMigration bool
+	}{
+		"inode to fingerprint succeeds": {
+			oldIdentifier:           nativeIdentifier,
+			newIdentifier:           fingerprintIdentifier,
+			expectRegistryMigration: true,
+		},
+		"path to fingerprint succeeds": {
+			oldIdentifier:           pathIdentifier,
+			newIdentifier:           fingerprintIdentifier,
+			expectRegistryMigration: true,
+		},
+		"fingerprint to fingerprint fails": {
+			oldIdentifier: fingerprintIdentifier,
+			newIdentifier: fingerprintIdentifier,
+		},
+
+		// If the new identifier is not fingerprint, it will always fail.
+		// So we only test a couple of combinations
+		"fingerprint to native fails": {
+			oldIdentifier: fingerprintIdentifier,
+			newIdentifier: nativeIdentifier,
+		},
+		"path to native fails": {
+			oldIdentifier: pathIdentifier,
+			newIdentifier: nativeIdentifier,
+		},
+	}
+
+	for name, tc := range testCases {
+		t.Run(name, func(t *testing.T) {
+			oldKey := newIDFunc(tc.oldIdentifier.GetSource(fsEvent))
+			entries := map[string]loginp.Value{
+				oldKey: &mockUnpackValue{
+					key: oldKey,
+					fileMeta: fileMeta{
+						Source:         logFileFullPath,
+						IdentifierName: tc.oldIdentifier.Name(),
+					},
+				},
+			}
+
+			testStore := newMockProspectorCleaner(entries)
+			filesOnDisk := map[string]loginp.FileDescriptor{
+				tmpFileName: fd,
+			}
+
+			p := fileProspector{
+				logger:      logp.L(),
+				identifier:  tc.newIdentifier,
+				filewatcher: newMockFileWatcherWithFiles(filesOnDisk),
+			}
+
+			err = p.Init(
+				testStore,
+				newMockProspectorCleaner(nil),
+				newIDFunc,
+			)
+			require.NoError(t, err, "prospector Init must succeed")
+
+			// testStore.updatedKeys is in the format
+			// oldKey -> newKey
+			if tc.expectRegistryMigration {
+				assert.Equal(
+					t,
+					map[string]string{
+						oldKey: expectedNewKey,
+					},
+					testStore.updatedKeys,
+					"the registry entries were not correctly migrated")
+			} else {
+				assert.Equal(
+					t,
+					map[string]string{},
+					testStore.updatedKeys,
+					"expecting no migration")
+			}
+		})
+	}
+}
+
 func TestProspectorNewAndUpdatedFiles(t *testing.T) {
 	minuteAgo := time.Now().Add(-1 * time.Minute)
 
@@ -246,6 +376,7 @@ func TestProspectorNewAndUpdatedFiles(t *testing.T) {
 
 		t.Run(name, func(t *testing.T) {
 			p := fileProspector{
+				logger:      logp.L(),
 				filewatcher: newMockFileWatcher(test.events, len(test.events)),
 				identifier:  mustPathIdentifier(false),
 				ignoreOlder: test.ignoreOlder,
@@ -283,6 +414,7 @@ func TestProspectorHarvesterUpdateIgnoredFiles(t *testing.T) {
 
 	filewatcher := newMockFileWatcher([]loginp.FSEvent{eventCreate}, 2)
 	p := fileProspector{
+		logger:      logp.L(),
 		filewatcher: filewatcher,
 		identifier:  mustPathIdentifier(false),
 		ignoreOlder: 10 * time.Second,
@@ -347,6 +479,7 @@ func TestProspectorDeletedFile(t *testing.T) {
 
 		t.Run(name, func(t *testing.T) {
 			p := fileProspector{
+				logger:       logp.L(),
 				filewatcher:  newMockFileWatcher(test.events, len(test.events)),
 				identifier:   mustPathIdentifier(false),
 				cleanRemoved: test.cleanRemoved,
@@ -428,6 +561,7 @@ func TestProspectorRenamedFile(t *testing.T) {
 
 		t.Run(name, func(t *testing.T) {
 			p := fileProspector{
+				logger:            logp.L(),
 				filewatcher:       newMockFileWatcher(test.events, len(test.events)),
 				identifier:        mustPathIdentifier(test.trackRename),
 				stateChangeCloser: stateChangeCloserConfig{Renamed: test.closeRenamed},
@@ -600,12 +734,17 @@ func (mu *mockMetadataUpdater) Remove(s loginp.Source) error {
 
 type mockUnpackValue struct {
 	fileMeta
+	key string
 }
 
 func (u *mockUnpackValue) UnpackCursorMeta(to interface{}) error {
 	return typeconv.Convert(to, u.fileMeta)
 }
 
+func (u *mockUnpackValue) Key() string {
+	return u.key
+}
+
 type mockProspectorCleaner struct {
 	available   map[string]loginp.Value
 	cleanedKeys []string
@@ -695,6 +834,7 @@ func TestOnRenameFileIdentity(t *testing.T) {
 	for k, tc := range testCases {
 		t.Run(k, func(t *testing.T) {
 			p := fileProspector{
+				logger:            logp.L(),
 				filewatcher:       newMockFileWatcher(tc.events, len(tc.events)),
 				identifier:        mustPathIdentifier(true),
 				stateChangeCloser: stateChangeCloserConfig{Renamed: true},
diff --git a/filebeat/input/filestream/testdata/log.log b/filebeat/input/filestream/testdata/log.log
new file mode 100644
index 00000000000..733afc5a1aa
--- /dev/null
+++ b/filebeat/input/filestream/testdata/log.log
@@ -0,0 +1,10 @@
+51.157.82.254 - collins3480 [06/Dec/2024:17:03:34 -0500] "GET /enable/transparent HTTP/2.0" 503 29836
+128.72.132.219 - - [06/Dec/2024:17:03:34 -0500] "PATCH /redefine/paradigms/front-end/synergies HTTP/2.0" 200 2307
+153.167.184.78 - - [06/Dec/2024:17:03:34 -0500] "HEAD /leading-edge/interactive/interactive/one-to-one HTTP/2.0" 204 18593
+175.195.94.204 - - [06/Dec/2024:17:03:34 -0500] "PUT /incentivize HTTP/2.0" 301 3998
+235.228.211.66 - hoppe3344 [06/Dec/2024:17:03:34 -0500] "DELETE /proactive/customized/action-items/killer HTTP/2.0" 203 24605
+6.175.232.33 - - [06/Dec/2024:17:03:34 -0500] "HEAD /extensible/productize/b2b HTTP/1.0" 503 15893
+146.190.210.171 - - [06/Dec/2024:17:03:34 -0500] "HEAD /architect/embrace/evolve HTTP/1.0" 502 9833
+224.125.203.225 - - [06/Dec/2024:17:03:34 -0500] "DELETE /turn-key/infrastructures/vortals HTTP/1.0" 100 17062
+194.157.121.128 - nicolas3550 [06/Dec/2024:17:03:34 -0500] "PATCH /vortals/scalable/experiences/deploy HTTP/1.1" 503 8034
+88.58.87.19 - - [06/Dec/2024:17:03:34 -0500] "GET /vertical/schemas HTTP/2.0" 405 2034
diff --git a/filebeat/tests/integration/event_log_file_test.go b/filebeat/tests/integration/event_log_file_test.go
index fce7672199f..793a3386af7 100644
--- a/filebeat/tests/integration/event_log_file_test.go
+++ b/filebeat/tests/integration/event_log_file_test.go
@@ -37,6 +37,8 @@ filebeat.inputs:
   - type: filestream
     id: filestream-input-id
     enabled: true
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     parsers:
       - ndjson:
           target: ""
diff --git a/filebeat/tests/integration/filestream_test.go b/filebeat/tests/integration/filestream_test.go
index 3ddb04a2c20..1bd9f23b0b0 100644
--- a/filebeat/tests/integration/filestream_test.go
+++ b/filebeat/tests/integration/filestream_test.go
@@ -20,9 +20,12 @@
 package integration
 
 import (
+	"errors"
 	"fmt"
+	"os"
 	"path"
 	"path/filepath"
+	"strings"
 	"testing"
 	"time"
 
@@ -36,6 +39,8 @@ filebeat.inputs:
     paths:
       - %s
 
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     clean_inactive: 3s
     ignore_older: 2s
     close.on_state_change.inactive: 1s
@@ -105,3 +110,453 @@ func TestFilestreamCleanInactive(t *testing.T) {
 	registryFile := filepath.Join(filebeat.TempDir(), "data", "registry", "filebeat", "log.json")
 	filebeat.WaitFileContains(registryFile, `"op":"remove"`, time.Second)
 }
+<<<<<<< HEAD
+=======
+
+func TestFilestreamValidationPreventsFilebeatStart(t *testing.T) {
+	duplicatedIDs := `
+filebeat.inputs:
+  - type: filestream
+    id: duplicated-id-1
+    enabled: true
+    paths:
+      - /tmp/*.log
+  - type: filestream
+    id: duplicated-id-1
+    enabled: true
+    paths:
+      - /var/log/*.log
+
+output.discard.enabled: true
+logging:
+  level: debug
+  metrics:
+    enabled: false
+`
+	emptyID := `
+filebeat.inputs:
+  - type: filestream
+    enabled: true
+    paths:
+      - /tmp/*.log
+  - type: filestream
+    enabled: true
+    paths:
+      - /var/log/*.log
+
+output.discard.enabled: true
+logging:
+  level: debug
+  metrics:
+    enabled: false
+`
+	multipleDuplicatedIDs := `
+filebeat.inputs:
+  - type: filestream
+    enabled: true
+    paths:
+      - /tmp/*.log
+  - type: filestream
+    enabled: true
+    paths:
+      - /var/log/*.log
+
+  - type: filestream
+    id: duplicated-id-1
+    enabled: true
+    paths:
+      - /tmp/duplicated-id-1.log
+  - type: filestream
+    id: duplicated-id-1
+    enabled: true
+    paths:
+      - /tmp/duplicated-id-1-2.log
+
+
+  - type: filestream
+    id: unique-id-1
+    enabled: true
+    paths:
+      - /tmp/unique-id-1.log
+  - type: filestream
+    id: unique-id-2
+    enabled: true
+    paths:
+      - /var/log/unique-id-2.log
+
+output.discard.enabled: true
+logging:
+  level: debug
+  metrics:
+    enabled: false
+`
+	tcs := []struct {
+		name string
+		cfg  string
+	}{
+		{
+			name: "duplicated IDs",
+			cfg:  duplicatedIDs,
+		},
+		{
+			name: "duplicated empty ID",
+			cfg:  emptyID,
+		},
+		{
+			name: "two inputs without ID and duplicated IDs",
+			cfg:  multipleDuplicatedIDs,
+		},
+	}
+
+	for _, tc := range tcs {
+		t.Run(tc.name, func(t *testing.T) {
+			filebeat := integration.NewBeat(
+				t,
+				"filebeat",
+				"../../filebeat.test",
+			)
+
+			// Write configuration file and start Filebeat
+			filebeat.WriteConfigFile(tc.cfg)
+			filebeat.Start()
+
+			// Wait for error log
+			filebeat.WaitForLogs(
+				"filestream inputs validation error",
+				10*time.Second,
+				"Filebeat did not log a filestream input validation error")
+
+			proc, err := filebeat.Process.Wait()
+			require.NoError(t, err, "filebeat process.Wait returned an error")
+			assert.False(t, proc.Success(), "filebeat should have failed to start")
+
+		})
+	}
+}
+
+func TestFilestreamValidationSucceeds(t *testing.T) {
+	cfg := `
+filebeat.inputs:
+  - type: filestream
+    enabled: true
+    paths:
+      - /var/log/*.log
+
+  - type: filestream
+    id: unique-id-1
+    enabled: true
+    paths:
+      - /tmp/unique-id-1.log
+  - type: filestream
+    id: unique-id-2
+    enabled: true
+    paths:
+      - /var/log/unique-id-2.log
+
+output.discard.enabled: true
+logging:
+  level: debug
+  metrics:
+    enabled: false
+`
+	filebeat := integration.NewBeat(
+		t,
+		"filebeat",
+		"../../filebeat.test",
+	)
+
+	// Write configuration file and start Filebeat
+	filebeat.WriteConfigFile(cfg)
+	filebeat.Start()
+
+	// Wait for error log
+	filebeat.WaitForLogs(
+		"Input 'filestream' starting",
+		10*time.Second,
+		"Filebeat did log a validation error")
+}
+
+func TestFilestreamCanMigrateIdentity(t *testing.T) {
+	cfgTemplate := `
+filebeat.inputs:
+  - type: filestream
+    id: "test-migrate-ID"
+    paths:
+      - %s
+%s
+
+queue.mem:
+  flush.timeout: 0s
+
+path.home: %s
+
+output.file:
+  path: ${path.home}
+  filename: "output-file"
+  rotate_on_startup: false
+
+logging:
+  level: debug
+  selectors:
+    - input
+    - input.filestream
+    - input.filestream.prospector
+  metrics:
+    enabled: false
+`
+	nativeCfg := `
+    file_identity.native: ~
+`
+	pathCfg := `
+    file_identity.path: ~
+`
+	fingerprintCfg := `
+    file_identity.fingerprint: ~
+    prospector:
+      scanner:
+        fingerprint.enabled: true
+        check_interval: 0.1s
+`
+
+	testCases := map[string]struct {
+		oldIdentityCfg  string
+		oldIdentityName string
+		newIdentityCfg  string
+		notMigrateMsg   string
+		expectMigration bool
+	}{
+		"native to fingerprint": {
+			oldIdentityCfg:  nativeCfg,
+			oldIdentityName: "native",
+			newIdentityCfg:  fingerprintCfg,
+			expectMigration: true,
+		},
+
+		"path to fingerprint": {
+			oldIdentityCfg:  pathCfg,
+			oldIdentityName: "path",
+			newIdentityCfg:  fingerprintCfg,
+			expectMigration: true,
+		},
+
+		"path to native": {
+			oldIdentityCfg:  pathCfg,
+			newIdentityCfg:  nativeCfg,
+			oldIdentityName: "path",
+			expectMigration: false,
+			notMigrateMsg:   "file identity is 'native', will not migrate registry",
+		},
+	}
+
+	for name, tc := range testCases {
+		t.Run(name, func(t *testing.T) {
+			filebeat := integration.NewBeat(
+				t,
+				"filebeat",
+				"../../filebeat.test",
+			)
+			workDir := filebeat.TempDir()
+			outputFile := filepath.Join(workDir, "output-file*")
+			logFilepath := filepath.Join(workDir, "log.log")
+			integration.GenerateLogFile(t, logFilepath, 25, false)
+
+			cfgYAML := fmt.Sprintf(cfgTemplate, logFilepath, tc.oldIdentityCfg, workDir)
+			filebeat.WriteConfigFile(cfgYAML)
+			filebeat.Start()
+
+			// Wait for the file to be fully ingested
+			eofMsg := fmt.Sprintf("End of file reached: %s; Backoff now.", logFilepath)
+			filebeat.WaitForLogs(eofMsg, time.Second*10, "EOF was not reached")
+			requirePublishedEvents(t, filebeat, 25, outputFile)
+			filebeat.Stop()
+
+			newCfg := fmt.Sprintf(cfgTemplate, logFilepath, tc.newIdentityCfg, workDir)
+			if err := os.WriteFile(filebeat.ConfigFilePath(), []byte(newCfg), 0o644); err != nil {
+				t.Fatalf("cannot write new configuration file: %s", err)
+			}
+
+			filebeat.Start()
+
+			// The happy path is to migrate keys, so we assert it first
+			if tc.expectMigration {
+				// Test the case where the registry migration happens
+				migratingMsg := fmt.Sprintf("are the same, migrating. Source: '%s'", logFilepath)
+				filebeat.WaitForLogs(migratingMsg, time.Second*5, "prospector did not migrate registry entry")
+				filebeat.WaitForLogs("migrated entry in registry from", time.Second*10, "store did not update registry key")
+				filebeat.WaitForLogs(eofMsg, time.Second*10, "EOF was not reached the second time")
+				requirePublishedEvents(t, filebeat, 25, outputFile)
+
+				// Ingest more data to ensure the offset was migrated
+				integration.GenerateLogFile(t, logFilepath, 17, true)
+				filebeat.WaitForLogs(eofMsg, time.Second*5, "EOF was not reached the third time")
+
+				requirePublishedEvents(t, filebeat, 42, outputFile)
+				requireRegistryEntryRemoved(t, workDir, tc.oldIdentityName)
+				return
+			}
+
+			// Another option is for no keys to be migrated because the current
+			// file identity is not fingerprint
+			if tc.notMigrateMsg != "" {
+				filebeat.WaitForLogs(tc.notMigrateMsg, time.Second*5, "the registry should not have been migrated")
+			}
+
+			// The last thing to test when there is no migration is to assert
+			// the file has been fully re-ingested because the file identity
+			// changed
+			filebeat.WaitForLogs(eofMsg, time.Second*10, "EOF was not reached the second time")
+			requirePublishedEvents(t, filebeat, 50, outputFile)
+
+			// Ingest more data to ensure the offset is correctly tracked
+			integration.GenerateLogFile(t, logFilepath, 10, true)
+			filebeat.WaitForLogs(eofMsg, time.Second*5, "EOF was not reached the third time")
+			requirePublishedEvents(t, filebeat, 60, outputFile)
+		})
+	}
+}
+
+func TestFilestreamMigrateIdentityCornerCases(t *testing.T) {
+	cfgTemplate := `
+filebeat.inputs:
+  - type: filestream
+    id: "test-migrate-ID"
+    paths:
+      - %s
+%s
+
+queue.mem:
+  flush.timeout: 0s
+
+path.home: %s
+
+output.file:
+  path: ${path.home}
+  filename: "output-file"
+  rotate_on_startup: false
+
+logging:
+  level: debug
+  selectors:
+    - input
+    - input.filestream
+    - input.filestream.prospector
+  metrics:
+    enabled: false
+`
+	nativeCfg := `
+    file_identity.native: ~
+    prospector:
+      scanner:
+        fingerprint.enabled: false
+        check_interval: 0.1s
+`
+	fingerprintCfg := `
+    file_identity.fingerprint: ~
+    prospector:
+      scanner:
+        fingerprint.enabled: true
+        check_interval: 0.1s
+`
+
+	filebeat := integration.NewBeat(
+		t,
+		"filebeat",
+		"../../filebeat.test",
+	)
+	workDir := filebeat.TempDir()
+
+	logFilepath := filepath.Join(workDir, "log.log")
+	outputFile := filepath.Join(workDir, "output-file*")
+
+	cfgYAML := fmt.Sprintf(cfgTemplate, logFilepath, nativeCfg, workDir)
+	filebeat.WriteConfigFile(cfgYAML)
+	filebeat.Start()
+
+	// Create and ingest 4 different files, all with the same path
+	// to simulate log rotation
+	createFileAndWaitIngestion(t, logFilepath, outputFile, filebeat, 50, 50)
+	createFileAndWaitIngestion(t, logFilepath, outputFile, filebeat, 50, 100)
+	createFileAndWaitIngestion(t, logFilepath, outputFile, filebeat, 50, 150)
+	createFileAndWaitIngestion(t, logFilepath, outputFile, filebeat, 50, 200)
+
+	filebeat.Stop()
+	cfgYAML = fmt.Sprintf(cfgTemplate, logFilepath, fingerprintCfg, workDir)
+	if err := os.WriteFile(filebeat.ConfigFilePath(), []byte(cfgYAML), 0666); err != nil {
+		t.Fatalf("cannot write config file: %s", err)
+	}
+
+	filebeat.Start()
+
+	migratingMsg := fmt.Sprintf("are the same, migrating. Source: '%s'", logFilepath)
+	eofMsg := fmt.Sprintf("End of file reached: %s; Backoff now.", logFilepath)
+
+	filebeat.WaitForLogs(migratingMsg, time.Second*10, "prospector did not migrate registry entry")
+	filebeat.WaitForLogs("migrated entry in registry from", time.Second*10, "store did not update registry key")
+	// Filebeat logs the EOF message when it starts and the file had already been fully ingested.
+	filebeat.WaitForLogs(eofMsg, time.Second*10, "EOF was not reached after restart")
+
+	requirePublishedEvents(t, filebeat, 200, outputFile)
+	// Ingest more data to ensure the offset was migrated
+	integration.GenerateLogFile(t, logFilepath, 20, true)
+	filebeat.WaitForLogs(eofMsg, time.Second*5, "EOF was not reached after adding data")
+
+	requirePublishedEvents(t, filebeat, 220, outputFile)
+	requireRegistryEntryRemoved(t, workDir, "native")
+}
+
+func requireRegistryEntryRemoved(t *testing.T, workDir, identity string) {
+	t.Helper()
+
+	registryLogFile := filepath.Join(workDir, "data", "registry", "filebeat", "log.json")
+	entries := readFilestreamRegistryLog(t, registryLogFile)
+	inputEntries := []registryEntry{}
+	for _, currentEntry := range entries {
+		if strings.Contains(currentEntry.Key, identity) {
+			inputEntries = append(inputEntries, currentEntry)
+		}
+	}
+
+	lastNativeEntry := inputEntries[len(inputEntries)-1]
+	if lastNativeEntry.TTL != 0 {
+		t.Errorf("'%s' has not been removed from the registry", lastNativeEntry.Key)
+	}
+}
+
+func requirePublishedEvents(
+	t *testing.T,
+	filebeat *integration.BeatProc,
+	expected int,
+	outputFile string) {
+
+	t.Helper()
+	publishedEvents := filebeat.CountFileLines(outputFile)
+	if publishedEvents != expected {
+		t.Fatalf("expecting %d published events after file migration, got %d instead", expected, publishedEvents)
+	}
+}
+
+func createFileAndWaitIngestion(
+	t *testing.T,
+	logFilepath, outputFilepath string,
+	fb *integration.BeatProc,
+	n, outputTotal int) {
+
+	t.Helper()
+	_, err := os.Stat(logFilepath)
+	if err != nil && !errors.Is(err, os.ErrNotExist) {
+		t.Fatalf("cannot stat log file: %s", err)
+	}
+	// Remove the file if it exists
+	if err == nil {
+		if err := os.Remove(logFilepath); err != nil {
+			t.Fatalf("cannot remove log file: %s", err)
+		}
+	}
+
+	integration.GenerateLogFile(t, logFilepath, n, false)
+
+	eofMsg := fmt.Sprintf("End of file reached: %s; Backoff now.", logFilepath)
+	fb.WaitForLogs(eofMsg, time.Second*10, "EOF was not reached")
+	requirePublishedEvents(t, fb, outputTotal, outputFilepath)
+}
+>>>>>>> 78fe7a5b7 (Use `fingerprint` file identity by default and migrate file state from `native` or `path` (#41762))
diff --git a/filebeat/tests/integration/filestream_truncation_test.go b/filebeat/tests/integration/filestream_truncation_test.go
index 98db9a6ad23..f495c72f141 100644
--- a/filebeat/tests/integration/filestream_truncation_test.go
+++ b/filebeat/tests/integration/filestream_truncation_test.go
@@ -38,6 +38,8 @@ filebeat.inputs:
     id: a-unique-filestream-input-id
     enabled: true
     prospector.scanner.check_interval: 30s
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     paths:
       - %s
 output:
diff --git a/filebeat/tests/integration/store_test.go b/filebeat/tests/integration/store_test.go
index d4ee36298d5..e187c682676 100644
--- a/filebeat/tests/integration/store_test.go
+++ b/filebeat/tests/integration/store_test.go
@@ -41,6 +41,8 @@ filebeat.inputs:
     close.on_state_change.inactive: 8s
     ignore_older: 9s
     prospector.scanner.check_interval: 1s
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     paths:
       - %s
 
diff --git a/filebeat/tests/integration/translate_ldap_attribute_test.go b/filebeat/tests/integration/translate_ldap_attribute_test.go
index 376be5e36a2..d7c4f129593 100644
--- a/filebeat/tests/integration/translate_ldap_attribute_test.go
+++ b/filebeat/tests/integration/translate_ldap_attribute_test.go
@@ -45,6 +45,8 @@ const translateguidCfg = `
 filebeat.inputs:
   - type: filestream
     id: "test-translateguidCfg"
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     paths:
       - %s
 
diff --git a/filebeat/tests/system/config/filestream-fixup-id.yml.j2 b/filebeat/tests/system/config/filestream-fixup-id.yml.j2
index 7617429286d..446b7db9723 100644
--- a/filebeat/tests/system/config/filestream-fixup-id.yml.j2
+++ b/filebeat/tests/system/config/filestream-fixup-id.yml.j2
@@ -1,6 +1,8 @@
 filebeat.inputs:
   - type: filestream
     id: test-fix-global-id
+    file_identity.native: ~
+    prospector.scanner.fingerprint.enabled: false
     enabled: true
     paths:
       - {{path}}
diff --git a/filebeat/tests/system/test_reload_inputs.py b/filebeat/tests/system/test_reload_inputs.py
index dd81a60ffe8..cf58557f3ac 100644
--- a/filebeat/tests/system/test_reload_inputs.py
+++ b/filebeat/tests/system/test_reload_inputs.py
@@ -49,6 +49,8 @@ def test_filestream_reload_not_duplicate_id(self):
         input_config_template = """
 - type: filestream
   id: my-unique-id
+  file_identity.native: ~
+  prospector.scanner.fingerprint.enabled: false
   paths:
     - {}
 """
diff --git a/libbeat/tests/integration/framework.go b/libbeat/tests/integration/framework.go
index 904fc1e302a..186d8483f9f 100644
--- a/libbeat/tests/integration/framework.go
+++ b/libbeat/tests/integration/framework.go
@@ -994,3 +994,8 @@ func (b *BeatProc) CountFileLines(glob string) int {
 
 	return bytes.Count(data, []byte{'\n'})
 }
+
+// ConfigFilePath returns the config file path
+func (b *BeatProc) ConfigFilePath() string {
+	return b.configFile
+}
diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml
index 743a31a8610..1b560be40f1 100644
--- a/x-pack/filebeat/filebeat.reference.yml
+++ b/x-pack/filebeat/filebeat.reference.yml
@@ -2400,7 +2400,7 @@ filebeat.inputs:
   # If enabled, instead of relying on the device ID and inode values when comparing files,
   # compare hashes of the given byte ranges in files. A file becomes an ingest target
   # when its size grows larger than offset+length (see below). Until then it's ignored.
-  #prospector.scanner.fingerprint.enabled: false
+  #prospector.scanner.fingerprint.enabled: true
 
   # If fingerprint mode is enabled, sets the offset from the beginning of the file
   # for the byte range used for computing the fingerprint value.
@@ -2535,8 +2535,9 @@ filebeat.inputs:
   #clean_removed: true
 
   # Method to determine if two files are the same or not. By default
-  # the Beat considers two files the same if their inode and device id are the same.
-  #file_identity.native: ~
+  # a fingerprint is generated using the first 1024 bytes of the file,
+  # if the fingerprints match, then the files are considered equal.
+  #file_identity.fingerprint: ~
 
   # Optional additional fields. These fields can be freely picked
   # to add additional information to the crawled log files for filtering
@@ -3185,6 +3186,8 @@ filebeat.inputs:
 # batch of events has been published successfully. The default value is 1s.
 #filebeat.registry.flush: 1s
 
+# The interval which to run the registry clean up
+#filebeat.registry.cleanup_interval: 5m
 
 # Starting with Filebeat 7.0, the registry uses a new directory format to store
 # Filebeat state. After you upgrade, Filebeat will automatically migrate a 6.x

From e93e1185e12a687bf723c4637713cc81441846c7 Mon Sep 17 00:00:00 2001
From: Tiago Queiroz <tiago.queiroz@elastic.co>
Date: Thu, 19 Dec 2024 16:30:33 -0500
Subject: [PATCH 2/3] Fix merge conflicts

---
 filebeat/tests/integration/filestream_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/filebeat/tests/integration/filestream_test.go b/filebeat/tests/integration/filestream_test.go
index 1bd9f23b0b0..24125469dd8 100644
--- a/filebeat/tests/integration/filestream_test.go
+++ b/filebeat/tests/integration/filestream_test.go
@@ -29,6 +29,9 @@ import (
 	"testing"
 	"time"
 
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
 	"github.com/elastic/beats/v7/libbeat/tests/integration"
 )
 
@@ -110,8 +113,6 @@ func TestFilestreamCleanInactive(t *testing.T) {
 	registryFile := filepath.Join(filebeat.TempDir(), "data", "registry", "filebeat", "log.json")
 	filebeat.WaitFileContains(registryFile, `"op":"remove"`, time.Second)
 }
-<<<<<<< HEAD
-=======
 
 func TestFilestreamValidationPreventsFilebeatStart(t *testing.T) {
 	duplicatedIDs := `
@@ -559,4 +560,3 @@ func createFileAndWaitIngestion(
 	fb.WaitForLogs(eofMsg, time.Second*10, "EOF was not reached")
 	requirePublishedEvents(t, fb, outputTotal, outputFilepath)
 }
->>>>>>> 78fe7a5b7 (Use `fingerprint` file identity by default and migrate file state from `native` or `path` (#41762))

From ef081b9b5f96577bc7033aa76b463c7b9fab2997 Mon Sep 17 00:00:00 2001
From: Tiago Queiroz <tiago.queiroz@elastic.co>
Date: Thu, 19 Dec 2024 16:42:10 -0500
Subject: [PATCH 3/3] Revert the file_identity back to native

---
 CHANGELOG.next.asciidoc                       |  5 +-
 .../config/filebeat.inputs.reference.yml.tmpl |  7 ++-
 .../input-filestream-file-options.asciidoc    | 53 ++++++++-----------
 .../docs/inputs/input-filestream.asciidoc     | 25 ++++++---
 filebeat/filebeat.reference.yml               |  7 ++-
 filebeat/input/filestream/fswatch.go          |  2 +-
 filebeat/input/filestream/identifier.go       |  2 +-
 x-pack/filebeat/filebeat.reference.yml        |  7 ++-
 8 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index ff1c7fea449..1e3ae568e30 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -49,11 +49,8 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 - Remove deprecated awscloudwatch field from Filebeat. {pull}41089[41089]
 - The performance of ingesting SQS data with the S3 input has improved by up to 60x for queues with many small events. `max_number_of_messages` config for SQS mode is now ignored, as the new design no longer needs a manual cap on messages. Instead, use `number_of_workers` to scale ingestion rate in both S3 and SQS modes. The increased efficiency may increase network bandwidth consumption, which can be throttled by lowering `number_of_workers`. It may also increase number of events stored in memory, which can be throttled by lowering the configured size of the internal queue. {pull}40699[40699]
 - Fixes filestream logging the error "filestream input with ID 'ID' already exists, this will lead to data duplication[...]" on Kubernetes when using autodiscover. {pull}41585[41585]
-
 - Add kafka compression support for ZSTD.
 
-- Filebeat fails to start if there is any input with a duplicated ID. It logs the duplicated IDs and the offending inputs configurations. {pull}41731[41731]
-- The Filestream input only starts to ingest a file when it is >= 1024 bytes in size. This happens because the fingerprint` is the default file identity now. To restore the previous behaviour, set `file_identity.native: ~` and `prospector.scanner.fingerprint.enabled: false` {issue}40197[40197] {pull}41762[41762]
 *Heartbeat*
 
 
@@ -359,7 +356,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 - Add support for SSL and Proxy configurations for websoket type in streaming input. {pull}41934[41934]
 - AWS S3 input registry cleanup for untracked s3 objects. {pull}41694[41694]
 - The environment variable `BEATS_AZURE_EVENTHUB_INPUT_TRACING_ENABLED: true` enables internal logs tracer for the azure-eventhub input. {issue}41931[41931] {pull}41932[41932]
-- The Filestream input now uses the `fingerprint` file identity by default. The state from files are automatically migrated if the previous file identity was `native` (the default) or `path`. If the `file_identity` is explicitly set, there is no change in behaviour. {issue}40197[40197] {pull}41762[41762]
+- The Filestream input can automatically migrate state from files when changing the `file_identity` if the previous file identity was `native` (the default) or `path`. {issue}40197[40197] {pull}41762[41762]
 - Rate limiting operability improvements in the Okta provider of the Entity Analytics input. {issue}40106[40106] {pull}41977[41977]
 - Added default values in the streaming input for websocket retries and put a cap on retry wait time to be lesser than equal to the maximum defined wait time. {pull}42012[42012]
 
diff --git a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
index 5e44bcdb09e..ba658819582 100644
--- a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
+++ b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
@@ -303,7 +303,7 @@ filebeat.inputs:
   # If enabled, instead of relying on the device ID and inode values when comparing files,
   # compare hashes of the given byte ranges in files. A file becomes an ingest target
   # when its size grows larger than offset+length (see below). Until then it's ignored.
-  #prospector.scanner.fingerprint.enabled: true
+  #prospector.scanner.fingerprint.enabled: false
 
   # If fingerprint mode is enabled, sets the offset from the beginning of the file
   # for the byte range used for computing the fingerprint value.
@@ -438,9 +438,8 @@ filebeat.inputs:
   #clean_removed: true
 
   # Method to determine if two files are the same or not. By default
-  # a fingerprint is generated using the first 1024 bytes of the file,
-  # if the fingerprints match, then the files are considered equal.
-  #file_identity.fingerprint: ~
+  # the Beat considers two files the same if their inode and device id are the same.
+  #file_identity.native: ~
 
   # Optional additional fields. These fields can be freely picked
   # to add additional information to the crawled log files for filtering
diff --git a/filebeat/docs/inputs/input-filestream-file-options.asciidoc b/filebeat/docs/inputs/input-filestream-file-options.asciidoc
index b87d9e67af6..5278e013334 100644
--- a/filebeat/docs/inputs/input-filestream-file-options.asciidoc
+++ b/filebeat/docs/inputs/input-filestream-file-options.asciidoc
@@ -150,9 +150,9 @@ The default setting is 10s.
 [id="{beatname_lc}-input-{type}-scan-fingerprint"]
 ===== `prospector.scanner.fingerprint`
 
-Instead of relying on the device ID and inode values when comparing
-files, compare hashes of the given byte ranges of files. This is the
-default behaviour for {beatname_uc}.
+Instead of relying on the device ID and inode values when comparing files, compare hashes of the given byte ranges of files.
+
+Enable this option if you're experiencing data loss or data duplication due to unstable file identifiers provided by the file system.
 
 Following are some scenarios where this can happen:
 
@@ -553,31 +553,10 @@ IMPORTANT: Changing `file_identity` is only supported from `native` or
 automatically migrate the state of the file when {type} starts.
 
 WARNING: Any unsupported change in `file_identity` methods between
-runs may result in duplicated events in the output.
-
-[id="{beatname_lc}-input-{type}-file-identity-fingerprint"]
-*`fingerprint`*:: The default behaviour of {beatname_uc} is to
-identify files based on content by hashing a specific range (0 to 1024
-bytes by default).
-
-WARNING: In order to use this file identity option, you must enable
-the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint
-option in the scanner>>. Once this file identity is enabled, changing
-the fingerprint configuration (offset, length, or other settings) will
-lead to a global re-ingestion of all files that match the paths
-configuration of the input.
-
-Please refer to the
-<<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint
-configuration for details>>.
+duplicated events in the output.
 
-[source,yaml]
-----
-file_identity.fingerprint: ~
-----
-
-*`native`*:: Differentiates between files using their inodes and
-device ids.
+*`native`*:: The default behaviour of {beatname_uc} is to differentiate
+between files using their inodes and device ids.
 +
 In some cases these values can change during the lifetime of a file. 
 For example, when using the Linux
@@ -597,15 +576,15 @@ file_identity.native: ~
 ----
 
 *`path`*:: To identify files based on their paths use this strategy.
-+
+
 WARNING: Only use this strategy if your log files are rotated to a folder
 outside of the scope of your input or not at all. Otherwise you end up
 with duplicated events.
-+
+
 WARNING: This strategy does not support renaming files.
 If an input file is renamed, {beatname_uc} will read it again if the new path
 matches the settings of the input.
-+
+
 The states of files generated by `path` file identity can be migrated to `fingerprint`.
 
 [source,yaml]
@@ -615,7 +594,7 @@ file_identity.path: ~
 
 *`inode_marker`*:: If the device id changes from time to time, you must use
 this method to distinguish files. This option is not supported on Windows.
-+
+
 Set the location of the marker file the following way:
 
 [source,yaml]
@@ -623,6 +602,17 @@ Set the location of the marker file the following way:
 file_identity.inode_marker.path: /logs/.filebeat-marker
 ----
 
+*`fingerprint`*:: To identify files based on their content byte range.
+
+WARNING: In order to use this file identity option, you must enable the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint option in the scanner>>. Once this file identity is enabled, changing the fingerprint configuration (offset, length, or other settings) will lead to a global re-ingestion of all files that match the paths configuration of the input.
+
+Please refer to the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint configuration for details>>.
+
+[source,yaml]
+----
+file_identity.fingerprint: ~
+----
+
 [[filestream-log-rotation-support]]
 [float]
 === Log rotation
@@ -635,7 +625,6 @@ When reading from rotating files make sure the paths configuration includes
 both the active file and all rotated files.
 
 By default, {beatname_uc} is able to track files correctly in the following strategies:
-
 * create: new active file with a unique name is created on rotation
 * rename: rotated files are renamed
 
diff --git a/filebeat/docs/inputs/input-filestream.asciidoc b/filebeat/docs/inputs/input-filestream.asciidoc
index 74b7514b91a..96ba5e273e5 100644
--- a/filebeat/docs/inputs/input-filestream.asciidoc
+++ b/filebeat/docs/inputs/input-filestream.asciidoc
@@ -34,10 +34,11 @@ The `log` writes the complete file state.
 
 7. Stale entries can be removed from the registry, even if there is no active input.
 
-8. The default behaviour is to identify files based on their contents
-using the <<filebeat-input-filestream-file-identity-fingerprint,
-`fingerprint`>> <<filebeat-input-filestream-file-identity,
-`file_identity`>> This solves data duplication caused by inode reuse.
+8. The input can identify files based on their contents when using the
+<<filebeat-input-filestream-file-identity-fingerprint, `fingerprint`>>
+<<filebeat-input-filestream-file-identity, `file_identity`>> instead
+of the default inode and device ID. This solves data duplication
+caused by inode reuse.
 
 To configure this input, specify a list of glob-based <<filestream-input-paths,`paths`>>
 that must be crawled to locate and fetch the log lines.
@@ -93,7 +94,15 @@ multiple input sections:
 
 WARNING: Some file identity methods do not support reading from
 network shares and cloud providers, to avoid duplicating events, use
-the default `file_identity`: `fingerprint`.
+`fingerprint` when reading from network shares or cloud providers.
+
+By default, {beatname_uc} identifies files based on their inodes and
+device IDs. However, on network shares and cloud providers these
+values might change during the lifetime of the file. If this happens
+{beatname_uc} thinks that file is new and resends the whole content
+of the file. To solve this problem you can configure the `file_identity` option. Possible
+values besides the default `native` (inode + device ID) are
+`fingerprint`, `path` and `inode_marker`.
 
 IMPORTANT: Changing `file_identity` is only supported when
 migrating from `native` or `path` to `fingerprint`.
@@ -101,7 +110,7 @@ migrating from `native` or `path` to `fingerprint`.
 WARNING: Any unsupported change in `file_identity` methods between
 runs may result in duplicated events in the output.
 
-`fingerprint` is the default and recommended file identity because it does not
+`fingerprint` is the recommended file identity because it does not
 rely on the file system/OS, it generates a hash from a portion of the
 file (the first 1024 bytes, by default) and uses that to identify the
 file. This works well with log rotation strategies that move/rename
@@ -109,7 +118,9 @@ the file and on Windows as file identifiers might be more
 volatile. The downside is that {beatname_uc} will wait until the file
 reaches 1024 bytes before start ingesting any file.
 
-WARNING: Once this file identity is enabled, changing
+WARNING: In order to use this file identity option, one must enable
+the <<{beatname_lc}-input-filestream-scan-fingerprint,fingerprint
+option in the scanner>>. Once this file identity is enabled, changing
 the fingerprint configuration (offset, length, etc) will lead to a
 global re-ingestion of all files that match the paths configuration of
 the input.
diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml
index 0b35505cd33..be189fdfd1c 100644
--- a/filebeat/filebeat.reference.yml
+++ b/filebeat/filebeat.reference.yml
@@ -716,7 +716,7 @@ filebeat.inputs:
   # If enabled, instead of relying on the device ID and inode values when comparing files,
   # compare hashes of the given byte ranges in files. A file becomes an ingest target
   # when its size grows larger than offset+length (see below). Until then it's ignored.
-  #prospector.scanner.fingerprint.enabled: true
+  #prospector.scanner.fingerprint.enabled: false
 
   # If fingerprint mode is enabled, sets the offset from the beginning of the file
   # for the byte range used for computing the fingerprint value.
@@ -851,9 +851,8 @@ filebeat.inputs:
   #clean_removed: true
 
   # Method to determine if two files are the same or not. By default
-  # a fingerprint is generated using the first 1024 bytes of the file,
-  # if the fingerprints match, then the files are considered equal.
-  #file_identity.fingerprint: ~
+  # the Beat considers two files the same if their inode and device id are the same.
+  #file_identity.native: ~
 
   # Optional additional fields. These fields can be freely picked
   # to add additional information to the crawled log files for filtering
diff --git a/filebeat/input/filestream/fswatch.go b/filebeat/input/filestream/fswatch.go
index 00d84ed9ab4..c51d850bbd2 100644
--- a/filebeat/input/filestream/fswatch.go
+++ b/filebeat/input/filestream/fswatch.go
@@ -278,7 +278,7 @@ func defaultFileScannerConfig() fileScannerConfig {
 		Symlinks:      false,
 		RecursiveGlob: true,
 		Fingerprint: fingerprintConfig{
-			Enabled: true,
+			Enabled: false,
 			Offset:  0,
 			Length:  DefaultFingerprintSize,
 		},
diff --git a/filebeat/input/filestream/identifier.go b/filebeat/input/filestream/identifier.go
index 08bb0c5f071..a0cd7903e7a 100644
--- a/filebeat/input/filestream/identifier.go
+++ b/filebeat/input/filestream/identifier.go
@@ -76,7 +76,7 @@ func (f fileSource) Name() string {
 // newFileIdentifier creates a new state identifier for a log input.
 func newFileIdentifier(ns *conf.Namespace, suffix string) (fileIdentifier, error) {
 	if ns == nil {
-		i, err := newFingerprintIdentifier(nil)
+		i, err := newINodeDeviceIdentifier(nil)
 		if err != nil {
 			return nil, err
 		}
diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml
index 1b560be40f1..0c2eb0c0c51 100644
--- a/x-pack/filebeat/filebeat.reference.yml
+++ b/x-pack/filebeat/filebeat.reference.yml
@@ -2400,7 +2400,7 @@ filebeat.inputs:
   # If enabled, instead of relying on the device ID and inode values when comparing files,
   # compare hashes of the given byte ranges in files. A file becomes an ingest target
   # when its size grows larger than offset+length (see below). Until then it's ignored.
-  #prospector.scanner.fingerprint.enabled: true
+  #prospector.scanner.fingerprint.enabled: false
 
   # If fingerprint mode is enabled, sets the offset from the beginning of the file
   # for the byte range used for computing the fingerprint value.
@@ -2535,9 +2535,8 @@ filebeat.inputs:
   #clean_removed: true
 
   # Method to determine if two files are the same or not. By default
-  # a fingerprint is generated using the first 1024 bytes of the file,
-  # if the fingerprints match, then the files are considered equal.
-  #file_identity.fingerprint: ~
+  # the Beat considers two files the same if their inode and device id are the same.
+  #file_identity.native: ~
 
   # Optional additional fields. These fields can be freely picked
   # to add additional information to the crawled log files for filtering