From fde78148bc95f6933909e3f19bbabe906731e4dd Mon Sep 17 00:00:00 2001 From: Terry Cain Date: Wed, 10 Aug 2022 22:48:41 +0100 Subject: [PATCH] Adds support for checksums in streamed request trailers --- CHANGES.rst | 5 +++ aiobotocore/client.py | 2 +- aiobotocore/httpchecksum.py | 90 +++++++++++++++++++++++++++++++++++++ aiobotocore/httpsession.py | 2 + tests/test_basic_s3.py | 15 +++++++ tests/test_patches.py | 13 +++++- 6 files changed, 125 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5650559b..785fb2a4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,5 +1,10 @@ Changes ------- +2.4.0 Unreleased +^^^^^^^^^^^^^^^^ + +* fixes S3 Uploads with newer checksum algorithms + 2.3.4 (2022-06-23) ^^^^^^^^^^^^^^^^^^ * fix select_object_content diff --git a/aiobotocore/client.py b/aiobotocore/client.py index b184b407..28695ec6 100644 --- a/aiobotocore/client.py +++ b/aiobotocore/client.py @@ -6,7 +6,6 @@ PaginatorDocstring, S3ArnParamHandler, S3EndpointSetter, - apply_request_checksum, logger, resolve_checksum_context, ) @@ -23,6 +22,7 @@ from .paginate import AioPaginator from .retries import adaptive, standard from .utils import AioS3RegionRedirector +from .httpchecksum import apply_request_checksum history_recorder = get_global_history_recorder() diff --git a/aiobotocore/httpchecksum.py b/aiobotocore/httpchecksum.py index d3b31647..00af7da7 100644 --- a/aiobotocore/httpchecksum.py +++ b/aiobotocore/httpchecksum.py @@ -1,12 +1,51 @@ +import inspect +import io + from botocore.httpchecksum import ( _CHECKSUM_CLS, FlexibleChecksumError, _handle_streaming_response, base64, logger, + determine_content_length, + conditionally_calculate_md5, + _apply_request_header_checksum, + AwsChunkedWrapper ) +class AioAwsChunkedWrapper(AwsChunkedWrapper): + async def _make_chunk(self): + # NOTE: Chunk size is not deterministic as read could return less. This + # means we cannot know the content length of the encoded aws-chunked + # stream ahead of time without ensuring a consistent chunk size + + raw_chunk = self._raw.read(self._chunk_size) + if inspect.isawaitable(raw_chunk): + raw_chunk = await raw_chunk + + hex_len = hex(len(raw_chunk))[2:].encode("ascii") + self._complete = not raw_chunk + + if self._checksum: + self._checksum.update(raw_chunk) + + if self._checksum and self._complete: + name = self._checksum_name.encode("ascii") + checksum = self._checksum.b64digest().encode("ascii") + return b"0\r\n%s:%s\r\n\r\n" % (name, checksum) + + return b"%s\r\n%s\r\n" % (hex_len, raw_chunk) + + def __aiter__(self): + return self + + async def __anext__(self): + while not self._complete: + return await self._make_chunk() + raise StopAsyncIteration() + + async def handle_checksum_body( http_response, response, context, operation_model ): @@ -67,3 +106,54 @@ async def _handle_bytes_response(http_response, response, algorithm): ) raise FlexibleChecksumError(error_msg=error_msg) return body + +def apply_request_checksum(request): + checksum_context = request.get("context", {}).get("checksum", {}) + algorithm = checksum_context.get("request_algorithm") + + if not algorithm: + return + + if algorithm == "conditional-md5": + # Special case to handle the http checksum required trait + conditionally_calculate_md5(request) + elif algorithm["in"] == "header": + _apply_request_header_checksum(request) + elif algorithm["in"] == "trailer": + _apply_request_trailer_checksum(request) + else: + raise FlexibleChecksumError( + error_msg="Unknown checksum variant: %s" % algorithm["in"] + ) + + +def _apply_request_trailer_checksum(request): + checksum_context = request.get("context", {}).get("checksum", {}) + algorithm = checksum_context.get("request_algorithm") + location_name = algorithm["name"] + checksum_cls = _CHECKSUM_CLS.get(algorithm["algorithm"]) + + headers = request["headers"] + body = request["body"] + + if location_name in headers: + # If the header is already set by the customer, skip calculation + return + + # Cannot set this as aiohttp complains + headers["Transfer-Encoding"] = "chunked" + headers["Content-Encoding"] = "aws-chunked" + headers["X-Amz-Trailer"] = location_name + + content_length = determine_content_length(body) + if content_length is not None: + # Send the decoded content length if we can determine it. Some + # services such as S3 may require the decoded content length + headers["X-Amz-Decoded-Content-Length"] = str(content_length) + + if isinstance(body, (bytes, bytearray)): + body = io.BytesIO(body) + + request["body"] = AioAwsChunkedWrapper( + body, checksum_cls=checksum_cls, checksum_name=location_name, + ) diff --git a/aiobotocore/httpsession.py b/aiobotocore/httpsession.py index aa18f1c5..72baa5b0 100644 --- a/aiobotocore/httpsession.py +++ b/aiobotocore/httpsession.py @@ -186,6 +186,8 @@ async def send(self, request): # https://github.com/boto/botocore/issues/1255 headers['Accept-Encoding'] = 'identity' + headers.pop('Transfer-Encoding', '') # aiohttp complains if Transfer-Encoding header is set + headers_ = MultiDict( (z[0], _text(z[1], encoding='utf-8')) for z in headers.items() ) diff --git a/tests/test_basic_s3.py b/tests/test_basic_s3.py index 1377b591..219cec7e 100644 --- a/tests/test_basic_s3.py +++ b/tests/test_basic_s3.py @@ -1,4 +1,6 @@ import asyncio +import base64 +import hashlib from collections import defaultdict import aioitertools @@ -633,3 +635,16 @@ async def test_head_object_keys(s3_client, create_object, bucket_name): 'ContentLength', 'VersionId', } + + +@pytest.mark.xfail(reason="SHA256 Checksums on streaming objects are only sent in trailers unsigned over HTTPS") +@pytest.mark.moto +@pytest.mark.asyncio +async def test_put_object_sha256(s3_client, bucket_name): + data = b'test1234' + digest = hashlib.sha256(data).digest() + + resp = await s3_client.put_object(Bucket=bucket_name, Key='foobarbaz', Body=data, ChecksumAlgorithm='SHA256') + sha256_trailer_checksum = base64.b64decode(resp['ChecksumSHA256']) + + assert digest == sha256_trailer_checksum diff --git a/tests/test_patches.py b/tests/test_patches.py index 603cda91..86453d55 100644 --- a/tests/test_patches.py +++ b/tests/test_patches.py @@ -53,7 +53,13 @@ parse_get_bucket_location, ) from botocore.hooks import EventAliaser, HierarchicalEmitter -from botocore.httpchecksum import _handle_bytes_response, handle_checksum_body +from botocore.httpchecksum import ( + _handle_bytes_response, + _apply_request_trailer_checksum, + handle_checksum_body, + AwsChunkedWrapper, + apply_request_checksum +) from botocore.httpsession import URLLib3Session from botocore.paginate import PageIterator, ResultKeyIterator from botocore.parsers import ( @@ -557,6 +563,11 @@ # httpchecksum.py handle_checksum_body: {'4b9aeef18d816563624c66c57126d1ffa6fe1993'}, _handle_bytes_response: {'76f4f9d1da968dc6dbc24fd9f59b4b8ee86799f4'}, + AwsChunkedWrapper._make_chunk: {'097361692f0fd6c863a17dd695739629982ef7e4'}, + AwsChunkedWrapper.__iter__: {'261e26d1061655555fe3dcb2689d963e43f80fb0'}, + apply_request_checksum: {'bcc044f0655f30769994efab72b29e76d73f7e39'}, + _apply_request_trailer_checksum: {'9ef5bdf2d28fe03530f8d58eace28b1d5a368ead'}, + # retryhandler.py retryhandler.create_retry_handler: { 'fde9dfbc581f3d571f7bf9af1a966f0d28f6d89d'