From 57d30b33a06c3ea2afc5ca19ee1fd219385415d7 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Mon, 5 Aug 2024 18:44:33 +0200 Subject: [PATCH] Accept non URL-encoded headers (#4103) --- CHANGELOG.md | 2 + .../exporter/otlp/proto/grpc/exporter.py | 2 +- .../otlp/proto/http/_log_exporter/__init__.py | 4 +- .../proto/http/metric_exporter/__init__.py | 4 +- .../proto/http/trace_exporter/__init__.py | 4 +- .../metrics/test_otlp_metrics_exporter.py | 3 +- .../tests/test_proto_span_exporter.py | 3 +- .../src/opentelemetry/util/re.py | 56 ++++++++++++++---- opentelemetry-api/tests/util/test_re.py | 59 +++++++++++++++---- 9 files changed, 109 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da8f01646d..cfdc5686b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update log export example to not use root logger ([#4090](https://github.com/open-telemetry/opentelemetry-python/pull/4090)) - sdk: Add OS resource detector ([#3992](https://github.com/open-telemetry/opentelemetry-python/pull/3992)) +- sdk: Accept non URL-encoded headers in `OTEL_EXPORTER_OTLP_*HEADERS` to match other languages SDKs + ([#4103](https://github.com/open-telemetry/opentelemetry-python/pull/4103)) - Update semantic conventions to version 1.27.0 ([#4104](https://github.com/open-telemetry/opentelemetry-python/pull/4104)) diff --git a/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py b/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py index b422682828..0c398f33e6 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py @@ -188,7 +188,7 @@ def __init__( self._headers = headers or environ.get(OTEL_EXPORTER_OTLP_HEADERS) if isinstance(self._headers, str): - temp_headers = parse_env_headers(self._headers) + temp_headers = parse_env_headers(self._headers, liberal=True) self._headers = tuple(temp_headers.items()) elif isinstance(self._headers, dict): self._headers = tuple(self._headers.items()) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 902ac5f242..b337aedfb0 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -86,7 +86,9 @@ def __init__( OTEL_EXPORTER_OTLP_LOGS_HEADERS, environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""), ) - self._headers = headers or parse_env_headers(headers_string) + self._headers = headers or parse_env_headers( + headers_string, liberal=True + ) self._timeout = timeout or int( environ.get( OTEL_EXPORTER_OTLP_LOGS_TIMEOUT, diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index 57e030bd54..ea59d1a208 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -117,7 +117,9 @@ def __init__( OTEL_EXPORTER_OTLP_METRICS_HEADERS, environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""), ) - self._headers = headers or parse_env_headers(headers_string) + self._headers = headers or parse_env_headers( + headers_string, liberal=True + ) self._timeout = timeout or int( environ.get( OTEL_EXPORTER_OTLP_METRICS_TIMEOUT, diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index ea21cc664b..b94e54ef4a 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -84,7 +84,9 @@ def __init__( OTEL_EXPORTER_OTLP_TRACES_HEADERS, environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""), ) - self._headers = headers or parse_env_headers(headers_string) + self._headers = headers or parse_env_headers( + headers_string, liberal=True + ) self._timeout = timeout or int( environ.get( OTEL_EXPORTER_OTLP_TRACES_TIMEOUT, diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py index 674785056a..cb8f170b1c 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py @@ -244,7 +244,8 @@ def test_headers_parse_from_env(self): ( "Header format invalid! Header values in environment " "variables must be URL encoded per the OpenTelemetry " - "Protocol Exporter specification: missingValue" + "Protocol Exporter specification or a comma separated " + "list of name=value occurrences: missingValue" ), ) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py index 69874664c7..61b46dda48 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py @@ -197,7 +197,8 @@ def test_headers_parse_from_env(self): ( "Header format invalid! Header values in environment " "variables must be URL encoded per the OpenTelemetry " - "Protocol Exporter specification: missingValue" + "Protocol Exporter specification or a comma separated " + "list of name=value occurrences: missingValue" ), ) diff --git a/opentelemetry-api/src/opentelemetry/util/re.py b/opentelemetry-api/src/opentelemetry/util/re.py index 5f19521d04..bee1c61562 100644 --- a/opentelemetry-api/src/opentelemetry/util/re.py +++ b/opentelemetry-api/src/opentelemetry/util/re.py @@ -36,10 +36,23 @@ _KEY_VALUE_FORMAT = rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}{_VALUE_FORMAT}{_OWS}" _HEADER_PATTERN = compile(_KEY_VALUE_FORMAT) +_LIBERAL_HEADER_PATTERN = compile( + rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}[\w ]*{_OWS}" +) _DELIMITER_PATTERN = compile(r"[ \t]*,[ \t]*") _BAGGAGE_PROPERTY_FORMAT = rf"{_KEY_VALUE_FORMAT}|{_OWS}{_KEY_FORMAT}{_OWS}" +_INVALID_HEADER_ERROR_MESSAGE_STRICT_TEMPLATE = ( + "Header format invalid! Header values in environment variables must be " + "URL encoded per the OpenTelemetry Protocol Exporter specification: %s" +) + +_INVALID_HEADER_ERROR_MESSAGE_LIBERAL_TEMPLATE = ( + "Header format invalid! Header values in environment variables must be " + "URL encoded per the OpenTelemetry Protocol Exporter specification or " + "a comma separated list of name=value occurrences: %s" +) # pylint: disable=invalid-name @@ -49,30 +62,51 @@ def parse_headers(s: str) -> Mapping[str, str]: return parse_env_headers(s) -def parse_env_headers(s: str) -> Mapping[str, str]: +def parse_env_headers(s: str, liberal: bool = False) -> Mapping[str, str]: """ Parse ``s``, which is a ``str`` instance containing HTTP headers encoded for use in ENV variables per the W3C Baggage HTTP header format at https://www.w3.org/TR/baggage/#baggage-http-header-format, except that additional semi-colon delimited metadata is not supported. + If ``liberal`` is True we try to parse ``s`` anyway to be more compatible + with other languages SDKs that accept non URL-encoded headers by default. """ headers: Dict[str, str] = {} headers_list: List[str] = split(_DELIMITER_PATTERN, s) for header in headers_list: if not header: # empty string continue - match = _HEADER_PATTERN.fullmatch(header.strip()) - if not match: + header_match = _HEADER_PATTERN.fullmatch(header.strip()) + if not header_match and not liberal: _logger.warning( - "Header format invalid! Header values in environment variables must be " - "URL encoded per the OpenTelemetry Protocol Exporter specification: %s", - header, + _INVALID_HEADER_ERROR_MESSAGE_STRICT_TEMPLATE, header ) continue - # value may contain any number of `=` - name, value = match.string.split("=", 1) - name = unquote(name).strip().lower() - value = unquote(value).strip() - headers[name] = value + + if header_match: + match_string: str = header_match.string + # value may contain any number of `=` + name, value = match_string.split("=", 1) + name = unquote(name).strip().lower() + value = unquote(value).strip() + headers[name] = value + else: + # this is not url-encoded and does not match the spec but we decided to be + # liberal in what we accept to match other languages SDKs behaviour + liberal_header_match = _LIBERAL_HEADER_PATTERN.fullmatch( + header.strip() + ) + if not liberal_header_match: + _logger.warning( + _INVALID_HEADER_ERROR_MESSAGE_LIBERAL_TEMPLATE, header + ) + continue + + liberal_match_string: str = liberal_header_match.string + # value may contain any number of `=` + name, value = liberal_match_string.split("=", 1) + name = name.strip().lower() + value = value.strip() + headers[name] = value return headers diff --git a/opentelemetry-api/tests/util/test_re.py b/opentelemetry-api/tests/util/test_re.py index ea86f3e700..b3b4df46c5 100644 --- a/opentelemetry-api/tests/util/test_re.py +++ b/opentelemetry-api/tests/util/test_re.py @@ -20,8 +20,9 @@ class TestParseHeaders(unittest.TestCase): - def test_parse_env_headers(self): - inp = [ + @staticmethod + def _common_test_cases(): + return [ # invalid header name ("=value", [], True), ("}key=value", [], True), @@ -59,18 +60,54 @@ def test_parse_env_headers(self): True, ), ] + + def test_parse_env_headers(self): + inp = self._common_test_cases() + [ + # invalid header value + ("key=value othervalue", [], True), + ] for case_ in inp: headers, expected, warn = case_ - if warn: - with self.assertLogs(level="WARNING") as cm: + with self.subTest(headers=headers): + if warn: + with self.assertLogs(level="WARNING") as cm: + self.assertEqual( + parse_env_headers(headers), dict(expected) + ) + self.assertTrue( + "Header format invalid! Header values in environment " + "variables must be URL encoded per the OpenTelemetry " + "Protocol Exporter specification:" + in cm.records[0].message, + ) + else: self.assertEqual( parse_env_headers(headers), dict(expected) ) - self.assertTrue( - "Header format invalid! Header values in environment " - "variables must be URL encoded per the OpenTelemetry " - "Protocol Exporter specification:" - in cm.records[0].message, + + def test_parse_env_headers_liberal(self): + inp = self._common_test_cases() + [ + # valid header value + ("key=value othervalue", [("key", "value othervalue")], False), + ] + for case_ in inp: + headers, expected, warn = case_ + with self.subTest(headers=headers): + if warn: + with self.assertLogs(level="WARNING") as cm: + self.assertEqual( + parse_env_headers(headers, liberal=True), + dict(expected), + ) + self.assertTrue( + "Header format invalid! Header values in environment " + "variables must be URL encoded per the OpenTelemetry " + "Protocol Exporter specification or a comma separated " + "list of name=value occurrences:" + in cm.records[0].message, + ) + else: + self.assertEqual( + parse_env_headers(headers, liberal=True), + dict(expected), ) - else: - self.assertEqual(parse_env_headers(headers), dict(expected))