Skip to content

Commit

Permalink
feat(storage): add opt-in raw download support (#9572)
Browse files Browse the repository at this point in the history
* deps(bigquery): pin to allow g-r-m 0.5.x

* deps(storage): pin to require g-r-m >= 0.5.0
  • Loading branch information
tseaver authored Nov 5, 2019
1 parent 4af68dc commit 5f716db
Show file tree
Hide file tree
Showing 5 changed files with 311 additions and 352 deletions.
2 changes: 1 addition & 1 deletion bigquery/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
dependencies = [
'enum34; python_version < "3.4"',
"google-cloud-core >= 1.0.3, < 2.0dev",
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.5.0dev",
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev",
"protobuf >= 3.6.0",
]
extras = {
Expand Down
73 changes: 64 additions & 9 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
from google import resumable_media
from google.resumable_media.requests import ChunkedDownload
from google.resumable_media.requests import Download
from google.resumable_media.requests import RawDownload
from google.resumable_media.requests import RawChunkedDownload
from google.resumable_media.requests import MultipartUpload
from google.resumable_media.requests import ResumableUpload

Expand Down Expand Up @@ -591,7 +593,14 @@ def _get_download_url(self):
return _add_query_parameters(base_url, name_value_pairs)

def _do_download(
self, transport, file_obj, download_url, headers, start=None, end=None
self,
transport,
file_obj,
download_url,
headers,
start=None,
end=None,
raw_download=False,
):
"""Perform a download without any error handling.
Expand All @@ -617,14 +626,30 @@ def _do_download(
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.
"""
if self.chunk_size is None:
download = Download(
if raw_download:
klass = RawDownload
else:
klass = Download

download = klass(
download_url, stream=file_obj, headers=headers, start=start, end=end
)
download.consume(transport)

else:
download = ChunkedDownload(

if raw_download:
klass = RawChunkedDownload
else:
klass = ChunkedDownload

download = klass(
download_url,
self.chunk_size,
file_obj,
Expand All @@ -636,7 +661,9 @@ def _do_download(
while not download.finished:
download.consume_next_chunk(transport)

def download_to_file(self, file_obj, client=None, start=None, end=None):
def download_to_file(
self, file_obj, client=None, start=None, end=None, raw_download=False
):
"""Download the contents of this blob into a file-like object.
.. note::
Expand Down Expand Up @@ -676,6 +703,10 @@ def download_to_file(self, file_obj, client=None, start=None, end=None):
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
download_url = self._get_download_url()
Expand All @@ -684,11 +715,15 @@ def download_to_file(self, file_obj, client=None, start=None, end=None):

transport = self._get_transport(client)
try:
self._do_download(transport, file_obj, download_url, headers, start, end)
self._do_download(
transport, file_obj, download_url, headers, start, end, raw_download
)
except resumable_media.InvalidResponse as exc:
_raise_from_invalid_response(exc)

def download_to_filename(self, filename, client=None, start=None, end=None):
def download_to_filename(
self, filename, client=None, start=None, end=None, raw_download=False
):
"""Download the contents of this blob into a named file.
If :attr:`user_project` is set on the bucket, bills the API request
Expand All @@ -708,11 +743,21 @@ def download_to_filename(self, filename, client=None, start=None, end=None):
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
try:
with open(filename, "wb") as file_obj:
self.download_to_file(file_obj, client=client, start=start, end=end)
self.download_to_file(
file_obj,
client=client,
start=start,
end=end,
raw_download=raw_download,
)
except resumable_media.DataCorruption:
# Delete the corrupt downloaded file.
os.remove(filename)
Expand All @@ -723,7 +768,7 @@ def download_to_filename(self, filename, client=None, start=None, end=None):
mtime = time.mktime(updated.timetuple())
os.utime(file_obj.name, (mtime, mtime))

def download_as_string(self, client=None, start=None, end=None):
def download_as_string(self, client=None, start=None, end=None, raw_download=False):
"""Download the contents of this blob as a bytes object.
If :attr:`user_project` is set on the bucket, bills the API request
Expand All @@ -740,12 +785,22 @@ def download_as_string(self, client=None, start=None, end=None):
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.
:rtype: bytes
:returns: The data stored in this blob.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
string_buffer = BytesIO()
self.download_to_file(string_buffer, client=client, start=start, end=end)
self.download_to_file(
string_buffer,
client=client,
start=start,
end=end,
raw_download=raw_download,
)
return string_buffer.getvalue()

def _get_content_type(self, content_type, filename=None):
Expand Down
2 changes: 1 addition & 1 deletion storage/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
dependencies = [
"google-auth >= 1.2.0",
"google-cloud-core >= 1.0.3, < 2.0dev",
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.5dev",
"google-resumable-media >= 0.5.0, < 0.6dev",
]
extras = {}

Expand Down
19 changes: 19 additions & 0 deletions storage/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

import base64
import datetime
import gzip
import hashlib
import io
import os
import re
import tempfile
Expand Down Expand Up @@ -620,6 +622,23 @@ def test_download_blob_w_uri(self):

self.assertEqual(file_contents, stored_contents)

def test_upload_gzip_encoded_download_raw(self):
payload = b"DEADBEEF" * 1000
raw_stream = io.BytesIO()
with gzip.GzipFile(fileobj=raw_stream, mode="wb") as gzip_stream:
gzip_stream.write(payload)
zipped = raw_stream.getvalue()

blob = self.bucket.blob("test_gzipped.gz")
blob.content_encoding = "gzip"
blob.upload_from_file(raw_stream, rewind=True)

expanded = blob.download_as_string()
self.assertEqual(expanded, payload)

raw = blob.download_as_string(raw_download=True)
self.assertEqual(raw, zipped)


class TestUnicode(unittest.TestCase):
@unittest.skipIf(RUNNING_IN_VPCSC, "Test is not VPCSC compatible.")
Expand Down
Loading

0 comments on commit 5f716db

Please sign in to comment.