diff --git a/botocore/utils.py b/botocore/utils.py index a0d2d90245..9775e699a3 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -12,8 +12,8 @@ # language governing permissions and limitations under the License. import base64 import binascii -import cgi import datetime +import email.message import functools import hashlib import io @@ -3010,10 +3010,12 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'): if not content_type: return None - content_type, params = cgi.parse_header(content_type) + message = email.message.Message() + message['content-type'] = content_type + charset = message.get_param("charset") - if 'charset' in params: - return params['charset'].strip("'\"") + if charset is not None: + return charset if 'text' in content_type: return default diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 26b5ef2fa5..f55d847d19 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -68,6 +68,7 @@ determine_content_length, ensure_boolean, fix_s3_host, + get_encoding_from_headers, get_service_module_name, has_header, instance_cache, @@ -3372,3 +3373,18 @@ def seek(self, *args, **kwargs): ) def test_is_s3_accelerate_url(url, expected): assert is_s3_accelerate_url(url) == expected + + +@pytest.mark.parametrize( + 'headers, default, expected', + ( + ({}, 'ISO-8859-1', None), + ({'Content-Type': 'text/html; charset=utf-8'}, 'default', 'utf-8'), + ({'Content-Type': 'text/html; charset="utf-8"'}, 'default', 'utf-8'), + ({'Content-Type': 'text/html'}, 'ascii', 'ascii'), + ({'Content-Type': 'application/json'}, 'ISO-8859-1', None), + ), +) +def test_get_encoding_from_headers(headers, default, expected): + charset = get_encoding_from_headers(HeadersDict(headers), default=default) + assert charset == expected