From a6db9e52db1971ff25cedd2ad5d0dffd6770b207 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 29 Jun 2021 10:26:14 -0600 Subject: [PATCH 1/8] Fix merging dicts for duplicate dotted keys Fixes #45 --- ecs_logging/_utils.py | 4 ++-- tests/conftest.py | 5 +++++ tests/test_structlog_formatter.py | 8 +++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ecs_logging/_utils.py b/ecs_logging/_utils.py index df49ac1..f0cd158 100644 --- a/ecs_logging/_utils.py +++ b/ecs_logging/_utils.py @@ -111,8 +111,8 @@ def merge_dicts(from_, into): When called has side-effects within 'destination'. """ for key, value in from_.items(): - if isinstance(value, dict): - merge_dicts(value, into.setdefault(key, {})) + if isinstance(value, dict) and isinstance(into.setdefault(key, {}), dict): + merge_dicts(value, into[key]) else: into[key] = value return into diff --git a/tests/conftest.py b/tests/conftest.py index 48eeebf..2993599 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,7 @@ import json import os import collections +import sys import pytest @@ -27,6 +28,10 @@ class ValidationError(Exception): pass +if sys.version_info[0] >= 3: + basestring = str + + @pytest.fixture def spec_validator(): with open( diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index a0eb6b4..d0d7e5a 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -22,7 +22,12 @@ def make_event_dict(): - return {"event": "test message", "log.logger": "logger-name"} + return { + "event": "test message", + "log.logger": "logger-name", + "foo": "bar", + "foo.dataset": "baz", + } @mock.patch("time.time") @@ -33,6 +38,7 @@ def test_event_dict_formatted(time, spec_validator): assert spec_validator(formatter(None, "debug", make_event_dict())) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","ecs":{"version":"1.6.0"},' + '"foo":{"dataset":"baz"},' '"log":{"logger":"logger-name"}}' ) From 1edb8eef4a4ff126f86f3f5f1f2fa629f20aae91 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 29 Jun 2021 10:36:07 -0600 Subject: [PATCH 2/8] CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6867735..dfa2f83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +* Fix for duplicate dotted keys ([#46]) + ## 1.0.0 (2021-02-08) * Remove "beta" designation From f50fa9c327750c92c0141b0c0ef38bae67d4db5c Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 29 Jun 2021 10:37:21 -0600 Subject: [PATCH 3/8] Force `message` to a string --- ecs_logging/_structlog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecs_logging/_structlog.py b/ecs_logging/_structlog.py index dac4d4a..407e15d 100644 --- a/ecs_logging/_structlog.py +++ b/ecs_logging/_structlog.py @@ -36,7 +36,7 @@ def __call__(self, _, name, event_dict): def format_to_ecs(self, event_dict): # type: (Dict[str, Any]) -> Dict[str, Any] - event_dict["message"] = event_dict.pop("event") + event_dict["message"] = str(event_dict.pop("event")) if "@timestamp" not in event_dict: event_dict["@timestamp"] = ( datetime.datetime.utcfromtimestamp(time.time()).strftime( From 7a539b434483f43f3eca084ee18c560fcacc8be6 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Wed, 30 Jun 2021 11:03:56 -0600 Subject: [PATCH 4/8] Avoid conflicts between `event` and `event.dataset` --- ecs_logging/_structlog.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ecs_logging/_structlog.py b/ecs_logging/_structlog.py index 407e15d..a34e2c9 100644 --- a/ecs_logging/_structlog.py +++ b/ecs_logging/_structlog.py @@ -29,6 +29,10 @@ class StructlogFormatter: def __call__(self, _, name, event_dict): # type: (Any, str, Dict[str, Any]) -> str + + # Handle event -> message now so that stuff like `event.dataset` doesn't + # cause problems down the line + event_dict["message"] = str(event_dict.pop("event")) event_dict = normalize_dict(event_dict) event_dict.setdefault("log", {}).setdefault("level", name.lower()) event_dict = self.format_to_ecs(event_dict) @@ -36,7 +40,6 @@ def __call__(self, _, name, event_dict): def format_to_ecs(self, event_dict): # type: (Dict[str, Any]) -> Dict[str, Any] - event_dict["message"] = str(event_dict.pop("event")) if "@timestamp" not in event_dict: event_dict["@timestamp"] = ( datetime.datetime.utcfromtimestamp(time.time()).strftime( From ffcbe9a6a5bae40d26de99b70a01723decce92ae Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Wed, 30 Jun 2021 11:04:10 -0600 Subject: [PATCH 5/8] Better handling for elasticapm client cleanup --- tests/conftest.py | 13 +++++++++++++ tests/test_apm.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2993599..ed7e588 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,8 @@ import os import collections import sys +import logging +import elasticapm import pytest @@ -86,3 +88,14 @@ def validator(data_json): return data_json return validator + + +@pytest.fixture +def apm(): + if sys.version_info[0] >= 3: + record_factory = logging.getLogRecordFactory() + apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) + yield apm + apm.close() + if sys.version_info[0] >= 3: + logging.setLogRecordFactory(record_factory) diff --git a/tests/test_apm.py b/tests/test_apm.py index 101924a..004020e 100644 --- a/tests/test_apm.py +++ b/tests/test_apm.py @@ -27,8 +27,7 @@ from .compat import StringIO -def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator): - apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) +def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator, apm): stream = StringIO() logger = structlog.PrintLogger(stream) logger = structlog.wrap_logger( @@ -57,14 +56,14 @@ def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator): "trace": {"id": trace_id}, "transaction": {"id": transaction_id}, "service": {"name": "apm-service"}, + "event": {"dataset": "apm-service.log"}, } @pytest.mark.skipif( sys.version_info < (3, 2), reason="elastic-apm uses logger factory in Python 3.2+" ) -def test_elastic_apm_stdlib_no_filter_log_correlation_ecs_fields(): - apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) +def test_elastic_apm_stdlib_no_filter_log_correlation_ecs_fields(apm): stream = StringIO() logger = logging.getLogger("apm-logger") handler = logging.StreamHandler(stream) @@ -104,11 +103,11 @@ def test_elastic_apm_stdlib_no_filter_log_correlation_ecs_fields(): "trace": {"id": trace_id}, "transaction": {"id": transaction_id}, "service": {"name": "apm-service"}, + "event": {"dataset": "apm-service.log"}, } -def test_elastic_apm_stdlib_with_filter_log_correlation_ecs_fields(): - apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) +def test_elastic_apm_stdlib_with_filter_log_correlation_ecs_fields(apm): stream = StringIO() logger = logging.getLogger("apm-logger") handler = logging.StreamHandler(stream) @@ -149,11 +148,11 @@ def test_elastic_apm_stdlib_with_filter_log_correlation_ecs_fields(): "trace": {"id": trace_id}, "transaction": {"id": transaction_id}, "service": {"name": "apm-service"}, + "event": {"dataset": "apm-service.log"}, } -def test_elastic_apm_stdlib_exclude_fields(): - apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) +def test_elastic_apm_stdlib_exclude_fields(apm): stream = StringIO() logger = logging.getLogger("apm-logger") handler = logging.StreamHandler(stream) @@ -195,4 +194,5 @@ def test_elastic_apm_stdlib_exclude_fields(): "message": "test message", "trace": {"id": trace_id}, "service": {"name": "apm-service"}, + "event": {"dataset": "apm-service.log"}, } From 3a9c96c3fc5ce07daedab7f1ac0eee1d4228fc38 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Wed, 30 Jun 2021 11:42:42 -0600 Subject: [PATCH 6/8] Better errors for conflicting dotted keys --- ecs_logging/_utils.py | 10 +++++++++- tests/test_structlog_formatter.py | 15 +++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/ecs_logging/_utils.py b/ecs_logging/_utils.py index f0cd158..6402f74 100644 --- a/ecs_logging/_utils.py +++ b/ecs_logging/_utils.py @@ -111,8 +111,16 @@ def merge_dicts(from_, into): When called has side-effects within 'destination'. """ for key, value in from_.items(): - if isinstance(value, dict) and isinstance(into.setdefault(key, {}), dict): + into.setdefault(key, {}) + if isinstance(value, dict) and isinstance(into[key], dict): merge_dicts(value, into[key]) + elif into[key]: + raise TypeError( + "Type mismatch at key `{}`: merging dicts would replace value `{}` with `{}`. This is likely due to " + "dotted keys in the event dict being turned into nested dictionaries, causing a conflict.".format( + key, into[key], value + ) + ) else: into[key] = value return into diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index d0d7e5a..76effdc 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -20,16 +20,26 @@ import mock from .compat import StringIO +import pytest + def make_event_dict(): return { "event": "test message", + "event.dataset": "agent.log", "log.logger": "logger-name", "foo": "bar", - "foo.dataset": "baz", } +def test_conflicting_event_dict(): + formatter = ecs_logging.StructlogFormatter() + event_dict = make_event_dict() + event_dict["foo.bar"] = "baz" + with pytest.raises(TypeError): + formatter(None, "debug", event_dict) + + @mock.patch("time.time") def test_event_dict_formatted(time, spec_validator): time.return_value = 1584720997.187709 @@ -38,7 +48,8 @@ def test_event_dict_formatted(time, spec_validator): assert spec_validator(formatter(None, "debug", make_event_dict())) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","ecs":{"version":"1.6.0"},' - '"foo":{"dataset":"baz"},' + '"event":{"dataset":"agent.log"},' + '"foo":"bar",' '"log":{"logger":"logger-name"}}' ) From 42b18bdf08c59c4832f3696ce9a6db5adc4b01ac Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Wed, 30 Jun 2021 12:03:22 -0600 Subject: [PATCH 7/8] Peg APM tests to py 3.6+ only --- tests/test_apm.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_apm.py b/tests/test_apm.py index 004020e..ffa67b0 100644 --- a/tests/test_apm.py +++ b/tests/test_apm.py @@ -27,6 +27,11 @@ from .compat import StringIO +# elasticapm only supports python 3.6+ +if sys.version_info < (3, 6): + pytestmark = [pytest.mark.skip] + + def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator, apm): stream = StringIO() logger = structlog.PrintLogger(stream) @@ -60,9 +65,6 @@ def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator, apm): } -@pytest.mark.skipif( - sys.version_info < (3, 2), reason="elastic-apm uses logger factory in Python 3.2+" -) def test_elastic_apm_stdlib_no_filter_log_correlation_ecs_fields(apm): stream = StringIO() logger = logging.getLogger("apm-logger") From 54c5afc60d3b465887cfe70c93421323e16ec300 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 1 Jul 2021 08:57:30 -0600 Subject: [PATCH 8/8] Reviewer suggestions --- CHANGELOG.md | 4 +++- ecs_logging/_utils.py | 2 +- tests/conftest.py | 2 ++ tests/test_apm.py | 5 ----- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa2f83..1263606 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,9 @@ ## Unreleased -* Fix for duplicate dotted keys ([#46]) +* Fixed an issue in `StructlogFormatter` caused by a conflict with `event` + (used for the log `message`) and `event.dataset` (a field provided by the + `elasticapm` integration) ([#46](https://github.com/elastic/ecs-logging-python/pull/46)) ## 1.0.0 (2021-02-08) diff --git a/ecs_logging/_utils.py b/ecs_logging/_utils.py index 6402f74..dea9ee9 100644 --- a/ecs_logging/_utils.py +++ b/ecs_logging/_utils.py @@ -114,7 +114,7 @@ def merge_dicts(from_, into): into.setdefault(key, {}) if isinstance(value, dict) and isinstance(into[key], dict): merge_dicts(value, into[key]) - elif into[key]: + elif into[key] != {}: raise TypeError( "Type mismatch at key `{}`: merging dicts would replace value `{}` with `{}`. This is likely due to " "dotted keys in the event dict being turned into nested dictionaries, causing a conflict.".format( diff --git a/tests/conftest.py b/tests/conftest.py index ed7e588..45d6873 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -92,6 +92,8 @@ def validator(data_json): @pytest.fixture def apm(): + if sys.version_info < (3, 6): + pytest.skip("elasticapm only supports python 3.6+") if sys.version_info[0] >= 3: record_factory = logging.getLogRecordFactory() apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) diff --git a/tests/test_apm.py b/tests/test_apm.py index ffa67b0..0d28784 100644 --- a/tests/test_apm.py +++ b/tests/test_apm.py @@ -27,11 +27,6 @@ from .compat import StringIO -# elasticapm only supports python 3.6+ -if sys.version_info < (3, 6): - pytestmark = [pytest.mark.skip] - - def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator, apm): stream = StringIO() logger = structlog.PrintLogger(stream)