Skip to content

Commit

Permalink
New blob_properties transport parameter for GCS (#632)
Browse files Browse the repository at this point in the history
* Pass properties to set on GCS blob.

Example:
```
smart_open.open("gs://somewhere/somefile.json.gz", transport_params={
  "blob_properties": {
    "content_type": "application/json",
    "content_encoding": "gzip"
  }
}
```

* Update CHANGELOG.md

Co-authored-by: Michael Penkov <misha.penkov@gmail.com>
Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
3 people authored Aug 18, 2021
1 parent 902d0de commit 24e0e3f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- Work around changes to `urllib.parse.urlsplit` (PR [#633](https://github.com/RaRe-Technologies/smart_open/pull/633), [@judahrand](https://github.com/judahrand)
- Change python_requires version to fix PEP 440 issue (PR [#639](https://github.com/RaRe-Technologies/smart_open/pull/639), [@lucasvieirasilva](https://github.com/lucasvieirasilva))
- New blob_properties transport parameter for GCS (PR [#632](https://github.com/RaRe-Technologies/smart_open/pull/632), [@FHTheron](https://github.com/FHTheron))

# 5.1.0, 25 May 2021

Expand Down
9 changes: 9 additions & 0 deletions smart_open/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def open(
buffer_size=DEFAULT_BUFFER_SIZE,
min_part_size=_MIN_MIN_PART_SIZE,
client=None, # type: google.cloud.storage.Client
blob_properties=None
):
"""Open an GCS blob for reading or writing.
Expand All @@ -129,6 +130,8 @@ def open(
The minimum part size for multipart uploads. For writing only.
client: google.cloud.storage.Client, optional
The GCS client to use when working with google-cloud-storage.
blob_properties: dict, optional
Set properties on blob before writing. For writing only.
"""
if mode == constants.READ_BINARY:
Expand All @@ -145,6 +148,7 @@ def open(
blob_id,
min_part_size=min_part_size,
client=client,
blob_properties=blob_properties,
)
else:
raise NotImplementedError('GCS support for mode %r not implemented' % mode)
Expand Down Expand Up @@ -396,6 +400,7 @@ def __init__(
blob,
min_part_size=_DEFAULT_MIN_PART_SIZE,
client=None, # type: google.cloud.storage.Client
blob_properties=None,
):
if client is None:
client = google.cloud.storage.Client()
Expand All @@ -412,6 +417,10 @@ def __init__(

self._session = google.auth.transport.requests.AuthorizedSession(client._credentials)

if blob_properties:
for k, v in blob_properties.items():
setattr(self._blob, k, v)

#
# https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload#start-resumable
#
Expand Down
12 changes: 12 additions & 0 deletions smart_open/tests/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,18 @@ def test_write_04(self):

self.assertEqual(output, [])

def test_write_05(self):
"""Do blob_properties get applied?"""
smart_open_write = smart_open.gcs.Writer(BUCKET_NAME, WRITE_BLOB_NAME,
blob_properties={
"content_type": "random/x-test",
"content_encoding": "coded"
}
)
with smart_open_write as fout: # noqa
assert fout._blob.content_type == "random/x-test"
assert fout._blob.content_encoding == "coded"

def test_gzip(self):
expected = u'а не спеть ли мне песню... о любви'.encode('utf-8')
with smart_open.gcs.Writer(BUCKET_NAME, WRITE_BLOB_NAME) as fout:
Expand Down

0 comments on commit 24e0e3f

Please sign in to comment.