Skip to content

Commit

Permalink
Adds support for checksums in streamed request trailers
Browse files Browse the repository at this point in the history
  • Loading branch information
terricain committed Aug 10, 2022
1 parent 9233555 commit fde7814
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 2 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Changes
-------
2.4.0 Unreleased
^^^^^^^^^^^^^^^^

* fixes S3 Uploads with newer checksum algorithms

2.3.4 (2022-06-23)
^^^^^^^^^^^^^^^^^^
* fix select_object_content
Expand Down
2 changes: 1 addition & 1 deletion aiobotocore/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
PaginatorDocstring,
S3ArnParamHandler,
S3EndpointSetter,
apply_request_checksum,
logger,
resolve_checksum_context,
)
Expand All @@ -23,6 +22,7 @@
from .paginate import AioPaginator
from .retries import adaptive, standard
from .utils import AioS3RegionRedirector
from .httpchecksum import apply_request_checksum

history_recorder = get_global_history_recorder()

Expand Down
90 changes: 90 additions & 0 deletions aiobotocore/httpchecksum.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,51 @@
import inspect
import io

from botocore.httpchecksum import (
_CHECKSUM_CLS,
FlexibleChecksumError,
_handle_streaming_response,
base64,
logger,
determine_content_length,
conditionally_calculate_md5,
_apply_request_header_checksum,
AwsChunkedWrapper
)


class AioAwsChunkedWrapper(AwsChunkedWrapper):
async def _make_chunk(self):
# NOTE: Chunk size is not deterministic as read could return less. This
# means we cannot know the content length of the encoded aws-chunked
# stream ahead of time without ensuring a consistent chunk size

raw_chunk = self._raw.read(self._chunk_size)
if inspect.isawaitable(raw_chunk):
raw_chunk = await raw_chunk

hex_len = hex(len(raw_chunk))[2:].encode("ascii")
self._complete = not raw_chunk

if self._checksum:
self._checksum.update(raw_chunk)

if self._checksum and self._complete:
name = self._checksum_name.encode("ascii")
checksum = self._checksum.b64digest().encode("ascii")
return b"0\r\n%s:%s\r\n\r\n" % (name, checksum)

return b"%s\r\n%s\r\n" % (hex_len, raw_chunk)

def __aiter__(self):
return self

async def __anext__(self):
while not self._complete:
return await self._make_chunk()
raise StopAsyncIteration()


async def handle_checksum_body(
http_response, response, context, operation_model
):
Expand Down Expand Up @@ -67,3 +106,54 @@ async def _handle_bytes_response(http_response, response, algorithm):
)
raise FlexibleChecksumError(error_msg=error_msg)
return body

def apply_request_checksum(request):
checksum_context = request.get("context", {}).get("checksum", {})
algorithm = checksum_context.get("request_algorithm")

if not algorithm:
return

if algorithm == "conditional-md5":
# Special case to handle the http checksum required trait
conditionally_calculate_md5(request)
elif algorithm["in"] == "header":
_apply_request_header_checksum(request)
elif algorithm["in"] == "trailer":
_apply_request_trailer_checksum(request)
else:
raise FlexibleChecksumError(
error_msg="Unknown checksum variant: %s" % algorithm["in"]
)


def _apply_request_trailer_checksum(request):
checksum_context = request.get("context", {}).get("checksum", {})
algorithm = checksum_context.get("request_algorithm")
location_name = algorithm["name"]
checksum_cls = _CHECKSUM_CLS.get(algorithm["algorithm"])

headers = request["headers"]
body = request["body"]

if location_name in headers:
# If the header is already set by the customer, skip calculation
return

# Cannot set this as aiohttp complains
headers["Transfer-Encoding"] = "chunked"
headers["Content-Encoding"] = "aws-chunked"
headers["X-Amz-Trailer"] = location_name

content_length = determine_content_length(body)
if content_length is not None:
# Send the decoded content length if we can determine it. Some
# services such as S3 may require the decoded content length
headers["X-Amz-Decoded-Content-Length"] = str(content_length)

if isinstance(body, (bytes, bytearray)):
body = io.BytesIO(body)

request["body"] = AioAwsChunkedWrapper(
body, checksum_cls=checksum_cls, checksum_name=location_name,
)
2 changes: 2 additions & 0 deletions aiobotocore/httpsession.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ async def send(self, request):
# https://github.com/boto/botocore/issues/1255
headers['Accept-Encoding'] = 'identity'

headers.pop('Transfer-Encoding', '') # aiohttp complains if Transfer-Encoding header is set

headers_ = MultiDict(
(z[0], _text(z[1], encoding='utf-8')) for z in headers.items()
)
Expand Down
15 changes: 15 additions & 0 deletions tests/test_basic_s3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import asyncio
import base64
import hashlib
from collections import defaultdict

import aioitertools
Expand Down Expand Up @@ -633,3 +635,16 @@ async def test_head_object_keys(s3_client, create_object, bucket_name):
'ContentLength',
'VersionId',
}


@pytest.mark.xfail(reason="SHA256 Checksums on streaming objects are only sent in trailers unsigned over HTTPS")
@pytest.mark.moto
@pytest.mark.asyncio
async def test_put_object_sha256(s3_client, bucket_name):
data = b'test1234'
digest = hashlib.sha256(data).digest()

resp = await s3_client.put_object(Bucket=bucket_name, Key='foobarbaz', Body=data, ChecksumAlgorithm='SHA256')
sha256_trailer_checksum = base64.b64decode(resp['ChecksumSHA256'])

assert digest == sha256_trailer_checksum
13 changes: 12 additions & 1 deletion tests/test_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@
parse_get_bucket_location,
)
from botocore.hooks import EventAliaser, HierarchicalEmitter
from botocore.httpchecksum import _handle_bytes_response, handle_checksum_body
from botocore.httpchecksum import (
_handle_bytes_response,
_apply_request_trailer_checksum,
handle_checksum_body,
AwsChunkedWrapper,
apply_request_checksum
)
from botocore.httpsession import URLLib3Session
from botocore.paginate import PageIterator, ResultKeyIterator
from botocore.parsers import (
Expand Down Expand Up @@ -557,6 +563,11 @@
# httpchecksum.py
handle_checksum_body: {'4b9aeef18d816563624c66c57126d1ffa6fe1993'},
_handle_bytes_response: {'76f4f9d1da968dc6dbc24fd9f59b4b8ee86799f4'},
AwsChunkedWrapper._make_chunk: {'097361692f0fd6c863a17dd695739629982ef7e4'},
AwsChunkedWrapper.__iter__: {'261e26d1061655555fe3dcb2689d963e43f80fb0'},
apply_request_checksum: {'bcc044f0655f30769994efab72b29e76d73f7e39'},
_apply_request_trailer_checksum: {'9ef5bdf2d28fe03530f8d58eace28b1d5a368ead'},

# retryhandler.py
retryhandler.create_retry_handler: {
'fde9dfbc581f3d571f7bf9af1a966f0d28f6d89d'
Expand Down

0 comments on commit fde7814

Please sign in to comment.