Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Phase 1 Added new method download_blob_to_file within Storage Client #7949

Merged
merged 4 commits into from
May 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions storage/google/cloud/storage/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

"""Client for interacting with the Google Cloud Storage API."""

from six.moves.urllib.parse import urlsplit

from google.auth.credentials import AnonymousCredentials

Expand All @@ -24,6 +25,7 @@
from google.cloud.storage._http import Connection
from google.cloud.storage.batch import Batch
from google.cloud.storage.bucket import Bucket
from google.cloud.storage.blob import Blob


_marker = object()
Expand Down Expand Up @@ -309,6 +311,57 @@ def create_bucket(self, bucket_or_name, requester_pays=None, project=None):
bucket.create(client=self, project=project)
return bucket

def download_blob_to_file(self, blob_or_uri, file_obj, start=None, end=None):
"""Download the contents of a blob object or blob URI into a file-like object.

Args:
blob_or_uri (Union[ \
:class:`~google.cloud.storage.blob.Blob`, \
str, \
]):
The blob resource to pass or URI to download.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's give an example of the URI gs://bucket_name/path/to/blob

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added examples for URI and if passing in a resource object.

file_obj (file):
A file handle to which to write the blob's data.
start (int):
Optional. The first byte in a range to be downloaded.
end (int):
Optional. The last byte in a range to be downloaded.

Examples:
Download a blob using using a blob resource.

>>> from google.cloud import storage
>>> client = storage.Client()

>>> bucket = client.get_bucket('my-bucket-name')
>>> blob = storage.Blob('path/to/blob', bucket)

>>> with open('file-to-download-to') as file_obj:
>>> client.download_blob_to_file(blob, file) # API request.


Download a blob using a URI.

>>> from google.cloud import storage
>>> client = storage.Client()

>>> with open('file-to-download-to') as file_obj:
>>> client.download_blob_to_file(
>>> 'gs://bucket_name/path/to/blob', file)


"""
try:
blob_or_uri.download_to_file(file_obj, client=self, start=start, end=end)
except AttributeError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for looking for AttributeError rather than checking isinstance like you did in create_bucket?

scheme, netloc, path, query, frag = urlsplit(blob_or_uri)
if scheme != "gs":
raise ValueError("URI scheme must be gs")
bucket = Bucket(self, name=netloc)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's raise a ValueError if scheme isn't gs. I was warned that people might start passing in http URLs and expect this to work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll need a test for this check. (Try a https:// link and verify that ValueError is raised with pytest.raises

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an if statement for raising ValueError if scheme isn't gs as directed. I also added the test for the change.

blob_or_uri = Blob(path, bucket)

blob_or_uri.download_to_file(file_obj, client=self, start=start, end=end)

def list_buckets(
self,
max_results=None,
Expand Down
41 changes: 41 additions & 0 deletions storage/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import io
import json
import unittest

import mock
import pytest
import requests
from six.moves import http_client

Expand Down Expand Up @@ -516,6 +518,45 @@ def test_create_bucket_with_object_success(self):
json_sent = http.request.call_args_list[0][1]["data"]
self.assertEqual(json_expected, json.loads(json_sent))

def test_download_blob_to_file_with_blob(self):
project = "PROJECT"
credentials = _make_credentials()
client = self._make_one(project=project, credentials=credentials)
blob = mock.Mock()
file_obj = io.BytesIO()

client.download_blob_to_file(blob, file_obj)
blob.download_to_file.assert_called_once_with(
file_obj, client=client, start=None, end=None
)

def test_download_blob_to_file_with_uri(self):
project = "PROJECT"
credentials = _make_credentials()
client = self._make_one(project=project, credentials=credentials)
blob = mock.Mock()
file_obj = io.BytesIO()

with mock.patch("google.cloud.storage.client.Blob", return_value=blob):
client.download_blob_to_file("gs://bucket_name/path/to/object", file_obj)

blob.download_to_file.assert_called_once_with(
file_obj, client=client, start=None, end=None
)

def test_download_blob_to_file_with_invalid_uri(self):
project = "PROJECT"
credentials = _make_credentials()
client = self._make_one(project=project, credentials=credentials)
blob = mock.Mock()
file_obj = io.BytesIO()

with mock.patch("google.cloud.storage.client.Blob", return_value=blob):
with pytest.raises(ValueError, match="URI scheme must be gs"):
client.download_blob_to_file(
"http://bucket_name/path/to/object", file_obj
)

def test_list_buckets_wo_project(self):
CREDENTIALS = _make_credentials()
client = self._make_one(project=None, credentials=CREDENTIALS)
Expand Down