diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 1eb64984..7e66004f 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,14 +23,14 @@ env_vars: { value: "github/python-bigquery-storage/.kokoro/release.sh" } -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google_cloud_pypi_password" + } + } } # Tokens needed to report release status back to GitHub diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index c232b2c2..cc2641d2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -80,25 +80,6 @@ We use `nox `__ to instrument our tests. .. nox: https://pypi.org/project/nox/ -Note on Editable Installs / Develop Mode -======================================== - -- As mentioned previously, using ``setuptools`` in `develop mode`_ - or a ``pip`` `editable install`_ is not possible with this - library. This is because this library uses `namespace packages`_. - For context see `Issue #2316`_ and the relevant `PyPA issue`_. - - Since ``editable`` / ``develop`` mode can't be used, packages - need to be installed directly. Hence your changes to the source - tree don't get incorporated into the **already installed** - package. - -.. _namespace packages: https://www.python.org/dev/peps/pep-0420/ -.. _Issue #2316: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2316 -.. _PyPA issue: https://github.com/pypa/packaging-problems/issues/12 -.. _develop mode: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode -.. _editable install: https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs - ***************************************** I'm getting weird errors... Can you help? ***************************************** diff --git a/README.rst b/README.rst index f8623e38..ad8a5e99 100644 --- a/README.rst +++ b/README.rst @@ -49,11 +49,14 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.5 +Python >= 3.6 -Deprecated Python Versions -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7. Python 2.7 support will be removed on January 1, 2020. +Unsupported Python Versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Python == 2.7, Python == 3.5. + +The last version of this library compatible with Python 2.7 and 3.5 is +``google-cloud-bigquery-storage==1.1.0``. Mac/Linux diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 00000000..92f130ae --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,282 @@ + + + +# 2.0.0 Migration Guide + +The 2.0 release of the `google-cloud-bigquery-storage` client is a significant +upgrade based on a [next-gen code generator](https://github.com/googleapis/gapic-generator-python), +and includes substantial interface changes. Existing code written for earlier versions +of this library will likely require updates to use this version. This document +describes the changes that have been made, and what you need to do to update your usage. + +If you experience issues or have questions, please file an +[issue](https://github.com/googleapis/python-bigquery-storage/issues). + + +## Supported Python Versions + +> **WARNING**: Breaking change + +The 2.0.0 release requires Python 3.6+. + + +## Import Path + +The library was moved into `google.cloud.bigquery` namespace. It is recommended +to use this path in order to reduce the chance of future compatibility issues +in case the library is restuctured internally. + +**Before:** +```py +from google.cloud.bigquery_storage_v1 import BigQueryReadClient +``` + +**After:** +```py +from google.cloud.bigquery.storage import BigQueryReadClient +``` + + +## Enum Types + +> **WARNING**: Breaking change + +Enum types have been moved. Access them through the `types` module. + +**Before:** +```py +from google.cloud.bigquery_storage_v1 import enums + +data_format = enums.DataFormat.ARROW +``` + +data_format = BigQueryReadClient.enums.DataFormat.ARROW + +**After:** +```py +from google.cloud.bigquery.storage import types + +data_format = types.DataFormat.ARROW +``` + +Additionally, enums cannot be accessed through the client anymore. The following +code wil _not_ work: +```py +data_format = BigQueryReadClient.enums.DataFormat.ARROW +``` + + +## Clients for Beta APIs + +> **WARNING**: Breaking change + +Clients for beta APIs have been removed. The following import will _not_ work: + +```py +from google.cloud.bigquery_storage_v1beta1 import BigQueryStorageClient +from google.cloud.bigquery_storage_v1beta2.gapic.big_query_read_client import BigQueryReadClient +``` + +The beta APIs are still available on the server side, but you will need to use +the 1.x version of the library to access them. + + +## Changed Default Value of the `read_rows()` Method's `metadata` Argument + +The `client.read_rows()` method does not accept `None` anymore as a valid value +for the optional `metadata` argument. If not given, an empty tuple is used, but +if you want to explicitly pass an "empty" value, you should use an empty tuple, too. + +**Before:** +```py +client.read_rows("stream_name", metadata=None) +``` + +**After:** +```py +client.read_rows("stream_name", metadata=()) +``` + +OR + +```py +client.read_rows("stream_name") +``` + + +## Method Calls + +> **WARNING**: Breaking change + +Most of the client methods that send requests to the backend expect request objects. +We provide a script that will convert most common use cases. + +> One exception to this is the `BigQueryReadClient.read_rows()` which is a hand-written +wrapper around the auto-generated `read_rows()` method. + +* Install the library + +```py +python3 -m pip install google-cloud-bigquery-storage +``` + +* The script `fixup_storage_v1_keywords.py` is shipped with the library. It expects +an input directory (with the code to convert) and an empty destination directory. + +```sh +$ scripts/fixup_storage_v1_keywords.py --input-directory .samples/ --output-directory samples/ +``` + +**Before:** +```py +from google.cloud import bigquery_storage_v1 + +client = bigquery_storage_v1.BigQueryReadClient() + +requested_session = bigquery_storage_v1.types.ReadSession() +requested_session.table = "projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID" +requested_session.data_format = bigquery_storage_v1.enums.DataFormat.ARROW + +session = client.create_read_session( + "projects/parent_project", + requested_session, + max_stream_count=1, +) +``` + +**After:** +```py +from google.cloud.bigquery import storage + +client = storage.BigQueryReadClient() + +requested_session = storage.types.ReadSession( + table="projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID", + data_format=storage.types.DataFormat.ARROW, +) +session = client.create_read_session( + request={ + "parent": "projects/parent_project", + "read_session": requested_session, + "max_stream_count" 1, + }, +) +``` + +### More Details + +In `google-cloud-bigquery-storage<2.0.0`, parameters required by the API were positional +parameters and optional parameters were keyword parameters. + +**Before:** +```py +def create_read_session( + self, + parent, + read_session, + max_stream_count=None, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None, +): +``` + +In the `2.0.0` release, methods that interact with the backend have a single +positional parameter `request`. Method docstrings indicate whether a parameter is +required or optional. + +Some methods have additional keyword only parameters. The available parameters depend +on the [`google.api.method_signature` annotation](https://github.com/googleapis/python-bigquery-storage/blob/9e1bf910e6f5010f479cf4592e25c3b3eebb456d/google/cloud/bigquery_storage_v1/proto/storage.proto#L73) +specified by the API producer. + + +**After:** +```py +def create_read_session( + self, + request: storage.CreateReadSessionRequest = None, + *, + parent: str = None, + read_session: stream.ReadSession = None, + max_stream_count: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), +) -> stream.ReadSession: +``` + +> **NOTE:** The `request` parameter and flattened keyword parameters for the API are +> mutually exclusive. Passing both will result in an error. + +Both of these calls are valid: + +```py +session = client.create_read_session( + request={ + "parent": "projects/parent_project", + "read_session": requested_session, + "max_stream_count" 1, + }, +) +``` + +```py +response = client.create_read_session( + parent="projects/parent_project", + read_session=requested_session, + max_stream_count=1, +) +``` + +This call is _invalid_ because it mixes `request` with a keyword argument +`max_stream_count`. Executing this code will result in an error: + +```py +session = client.create_read_session( + request={ + "parent": "projects/parent_project", + "read_session": requested_session, + }, + max_stream_count=1, +) +``` + +> **NOTE:** The `request` parameter of some methods can also contain a more rich set of +> options that are otherwise not available as explicit keyword only parameters, thus +> these _must_ be passed through `request`. + + +## Removed Utility Methods + +> **WARNING**: Breaking change + +Several utility methods such as `project_path()` and `table_path()` have been removed. +These paths must now be constructed manually: + +```py +project_path = f"project/{PROJECT_ID}" +table_path = f"projects/{PROJECT_ID}/datasets/{DATASET_ID}/tables/{TABLE_ID}" +``` + +The two that remained are `read_session_path()` and `read_stream_path()`. + + +## Removed `client_config` and `channel` Parameter + +The client cannot be constructed with `channel` or `client_config` arguments anymore, +these deprecated parameters have been removed. + +If you used `client_config` to customize retry and timeout settings for a particular +method, you now need to do it upon method invocation by passing the custom `timeout` and +`retry` arguments, respectively. diff --git a/docs/UPGRADING.md b/docs/UPGRADING.md new file mode 120000 index 00000000..01097c8c --- /dev/null +++ b/docs/UPGRADING.md @@ -0,0 +1 @@ +../UPGRADING.md \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 07f5ca07..3b109df5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.coverage", + "sphinx.ext.doctest", "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", diff --git a/docs/gapic/v1/api.rst b/docs/gapic/v1/api.rst deleted file mode 100644 index f2d66087..00000000 --- a/docs/gapic/v1/api.rst +++ /dev/null @@ -1,6 +0,0 @@ -Client for BigQuery Storage API -=============================== - -.. automodule:: google.cloud.bigquery_storage_v1 - :members: - :inherited-members: \ No newline at end of file diff --git a/docs/gapic/v1/types.rst b/docs/gapic/v1/types.rst deleted file mode 100644 index b2392e7a..00000000 --- a/docs/gapic/v1/types.rst +++ /dev/null @@ -1,5 +0,0 @@ -Types for BigQuery Storage API Client -===================================== - -.. automodule:: google.cloud.bigquery_storage_v1.types - :members: \ No newline at end of file diff --git a/docs/gapic/v1beta1/api.rst b/docs/gapic/v1beta1/api.rst deleted file mode 100644 index d4df9855..00000000 --- a/docs/gapic/v1beta1/api.rst +++ /dev/null @@ -1,6 +0,0 @@ -Client for BigQuery Storage API -=============================== - -.. automodule:: google.cloud.bigquery_storage_v1beta1 - :members: - :inherited-members: \ No newline at end of file diff --git a/docs/gapic/v1beta1/reader.rst b/docs/gapic/v1beta1/reader.rst deleted file mode 100644 index 5b6af828..00000000 --- a/docs/gapic/v1beta1/reader.rst +++ /dev/null @@ -1,6 +0,0 @@ -Reader for BigQuery Storage API -=============================== - -.. automodule:: google.cloud.bigquery_storage_v1beta1.reader - :members: - :inherited-members: diff --git a/docs/gapic/v1beta1/types.rst b/docs/gapic/v1beta1/types.rst deleted file mode 100644 index a36210a6..00000000 --- a/docs/gapic/v1beta1/types.rst +++ /dev/null @@ -1,5 +0,0 @@ -Types for BigQuery Storage API Client -===================================== - -.. automodule:: google.cloud.bigquery_storage_v1beta1.types - :members: \ No newline at end of file diff --git a/docs/gapic/v1beta2/api.rst b/docs/gapic/v1beta2/api.rst deleted file mode 100644 index d97911ba..00000000 --- a/docs/gapic/v1beta2/api.rst +++ /dev/null @@ -1,6 +0,0 @@ -Client for BigQuery Storage API -=============================== - -.. automodule:: google.cloud.bigquery_storage_v1beta2 - :members: - :inherited-members: \ No newline at end of file diff --git a/docs/gapic/v1beta2/types.rst b/docs/gapic/v1beta2/types.rst deleted file mode 100644 index 11711dca..00000000 --- a/docs/gapic/v1beta2/types.rst +++ /dev/null @@ -1,5 +0,0 @@ -Types for BigQuery Storage API Client -===================================== - -.. automodule:: google.cloud.bigquery_storage_v1beta2.types - :members: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index ee0768ff..fb3e7182 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,20 +2,6 @@ .. include:: multiprocessing.rst -API Reference -------------- -.. toctree:: - :maxdepth: 2 - - gapic/v1beta1/api - gapic/v1beta1/reader - gapic/v1beta1/types - - gapic/v1beta2/api - gapic/v1beta2/types - - gapic/v1/api - gapic/v1/types Example Usage ------------- @@ -25,3 +11,24 @@ Example Usage :dedent: 4 :start-after: [START bigquerystorage_quickstart] :end-before: [END bigquerystorage_quickstart] + + +API Reference +------------- +.. toctree:: + :maxdepth: 2 + + storage_v1/library + storage_v1/services + storage_v1/types + + +Migration Guide +--------------- + +See the guide below for instructions on migrating to the 2.x release of this library. + +.. toctree:: + :maxdepth: 2 + + UPGRADING diff --git a/docs/storage_v1/library.rst b/docs/storage_v1/library.rst new file mode 100644 index 00000000..acdd71b1 --- /dev/null +++ b/docs/storage_v1/library.rst @@ -0,0 +1,10 @@ +Bigquery Storage v1 API Library +=============================== + +.. automodule:: google.cloud.bigquery_storage_v1.client + :members: + :inherited-members: + +.. automodule:: google.cloud.bigquery_storage_v1.reader + :members: + :inherited-members: diff --git a/docs/storage_v1/services.rst b/docs/storage_v1/services.rst new file mode 100644 index 00000000..56b24588 --- /dev/null +++ b/docs/storage_v1/services.rst @@ -0,0 +1,6 @@ +Services for Google Cloud Bigquery Storage v1 API +================================================= + +.. automodule:: google.cloud.bigquery.storage_v1.services.big_query_read + :members: + :inherited-members: diff --git a/docs/storage_v1/types.rst b/docs/storage_v1/types.rst new file mode 100644 index 00000000..85f2d543 --- /dev/null +++ b/docs/storage_v1/types.rst @@ -0,0 +1,5 @@ +Types for Google Cloud Bigquery Storage v1 API +============================================== + +.. automodule:: google.cloud.bigquery.storage_v1.types + :members: diff --git a/google/cloud/bigquery/storage/__init__.py b/google/cloud/bigquery/storage/__init__.py new file mode 100644 index 00000000..24e20ba2 --- /dev/null +++ b/google/cloud/bigquery/storage/__init__.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from google.cloud.bigquery_storage_v1 import BigQueryReadClient +from google.cloud.bigquery_storage_v1 import types +from google.cloud.bigquery_storage_v1 import __version__ +from google.cloud.bigquery.storage_v1.types.arrow import ArrowRecordBatch +from google.cloud.bigquery.storage_v1.types.arrow import ArrowSchema +from google.cloud.bigquery.storage_v1.types.avro import AvroRows +from google.cloud.bigquery.storage_v1.types.avro import AvroSchema +from google.cloud.bigquery.storage_v1.types.storage import CreateReadSessionRequest +from google.cloud.bigquery.storage_v1.types.storage import ReadRowsRequest +from google.cloud.bigquery.storage_v1.types.storage import ReadRowsResponse +from google.cloud.bigquery.storage_v1.types.storage import SplitReadStreamRequest +from google.cloud.bigquery.storage_v1.types.storage import SplitReadStreamResponse +from google.cloud.bigquery.storage_v1.types.storage import StreamStats +from google.cloud.bigquery.storage_v1.types.storage import ThrottleState +from google.cloud.bigquery.storage_v1.types.stream import DataFormat +from google.cloud.bigquery.storage_v1.types.stream import ReadSession +from google.cloud.bigquery.storage_v1.types.stream import ReadStream + +__all__ = ( + "__version__", + "types", + "ArrowRecordBatch", + "ArrowSchema", + "AvroRows", + "AvroSchema", + "BigQueryReadClient", + "CreateReadSessionRequest", + "DataFormat", + "ReadRowsRequest", + "ReadRowsResponse", + "ReadSession", + "ReadStream", + "SplitReadStreamRequest", + "SplitReadStreamResponse", + "StreamStats", + "ThrottleState", +) diff --git a/google/cloud/bigquery/storage/py.typed b/google/cloud/bigquery/storage/py.typed new file mode 100644 index 00000000..e71b4749 --- /dev/null +++ b/google/cloud/bigquery/storage/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery-storage package uses inline types. diff --git a/google/cloud/bigquery/storage_v1/__init__.py b/google/cloud/bigquery/storage_v1/__init__.py new file mode 100644 index 00000000..55591c25 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/__init__.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .services.big_query_read import BigQueryReadClient +from .types.arrow import ArrowRecordBatch +from .types.arrow import ArrowSchema +from .types.avro import AvroRows +from .types.avro import AvroSchema +from .types.storage import CreateReadSessionRequest +from .types.storage import ReadRowsRequest +from .types.storage import ReadRowsResponse +from .types.storage import SplitReadStreamRequest +from .types.storage import SplitReadStreamResponse +from .types.storage import StreamStats +from .types.storage import ThrottleState +from .types.stream import DataFormat +from .types.stream import ReadSession +from .types.stream import ReadStream + + +__all__ = ( + "ArrowRecordBatch", + "ArrowSchema", + "AvroRows", + "AvroSchema", + "CreateReadSessionRequest", + "DataFormat", + "ReadRowsRequest", + "ReadRowsResponse", + "ReadSession", + "ReadStream", + "SplitReadStreamRequest", + "SplitReadStreamResponse", + "StreamStats", + "ThrottleState", + "BigQueryReadClient", +) diff --git a/google/cloud/bigquery/storage_v1/py.typed b/google/cloud/bigquery/storage_v1/py.typed new file mode 100644 index 00000000..e71b4749 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery-storage package uses inline types. diff --git a/google/cloud/bigquery/storage_v1/services/__init__.py b/google/cloud/bigquery/storage_v1/services/__init__.py new file mode 100644 index 00000000..42ffdf2b --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/google/cloud/bigquery_storage.py b/google/cloud/bigquery/storage_v1/services/big_query_read/__init__.py similarity index 67% rename from google/cloud/bigquery_storage.py rename to google/cloud/bigquery/storage_v1/services/big_query_read/__init__.py index bd7d3eb2..2105a1a6 100644 --- a/google/cloud/bigquery_storage.py +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/__init__.py @@ -1,29 +1,24 @@ # -*- coding: utf-8 -*- -# + # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# - -from __future__ import absolute_import - -from google.cloud.bigquery_storage_v1 import BigQueryReadClient -from google.cloud.bigquery_storage_v1 import enums -from google.cloud.bigquery_storage_v1 import types - +from .client import BigQueryReadClient +from .async_client import BigQueryReadAsyncClient __all__ = ( - "enums", - "types", "BigQueryReadClient", + "BigQueryReadAsyncClient", ) diff --git a/google/cloud/bigquery/storage_v1/services/big_query_read/async_client.py b/google/cloud/bigquery/storage_v1/services/big_query_read/async_client.py new file mode 100644 index 00000000..121c24e8 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/async_client.py @@ -0,0 +1,418 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +import functools +import re +from typing import Dict, AsyncIterable, Sequence, Tuple, Type, Union +import pkg_resources + +import google.api_core.client_options as ClientOptions # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.cloud.bigquery.storage_v1.types import arrow +from google.cloud.bigquery.storage_v1.types import avro +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + +from .transports.base import BigQueryReadTransport, DEFAULT_CLIENT_INFO +from .transports.grpc_asyncio import BigQueryReadGrpcAsyncIOTransport +from .client import BigQueryReadClient + + +class BigQueryReadAsyncClient: + """BigQuery Read API. + The Read API can be used to read data from BigQuery. + """ + + _client: BigQueryReadClient + + DEFAULT_ENDPOINT = BigQueryReadClient.DEFAULT_ENDPOINT + DEFAULT_MTLS_ENDPOINT = BigQueryReadClient.DEFAULT_MTLS_ENDPOINT + + read_session_path = staticmethod(BigQueryReadClient.read_session_path) + parse_read_session_path = staticmethod(BigQueryReadClient.parse_read_session_path) + read_stream_path = staticmethod(BigQueryReadClient.read_stream_path) + parse_read_stream_path = staticmethod(BigQueryReadClient.parse_read_stream_path) + + from_service_account_file = BigQueryReadClient.from_service_account_file + from_service_account_json = from_service_account_file + + get_transport_class = functools.partial( + type(BigQueryReadClient).get_transport_class, type(BigQueryReadClient) + ) + + def __init__( + self, + *, + credentials: credentials.Credentials = None, + transport: Union[str, BigQueryReadTransport] = "grpc_asyncio", + client_options: ClientOptions = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the big query read client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, ~.BigQueryReadTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (ClientOptions): Custom options for the client. It + won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + """ + + self._client = BigQueryReadClient( + credentials=credentials, + transport=transport, + client_options=client_options, + client_info=client_info, + ) + + async def create_read_session( + self, + request: storage.CreateReadSessionRequest = None, + *, + parent: str = None, + read_session: stream.ReadSession = None, + max_stream_count: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.ReadSession: + r"""Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Args: + request (:class:`~.storage.CreateReadSessionRequest`): + The request object. Request message for + `CreateReadSession`. + parent (:class:`str`): + Required. The request project that owns the session, in + the form of ``projects/{project_id}``. + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + read_session (:class:`~.stream.ReadSession`): + Required. Session to be created. + This corresponds to the ``read_session`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + max_stream_count (:class:`int`): + Max initial number of streams. If + unset or zero, the server will provide a + value of streams so as to produce + reasonable throughput. Must be non- + negative. The number of streams may be + lower than the requested number, + depending on the amount parallelism that + is reasonable for the table. Error will + be returned if the max count is greater + than the current system max limit of + 1,000. + + Streams must be read starting from + offset 0. + This corresponds to the ``max_stream_count`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.stream.ReadSession: + Information about the ReadSession. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + if request is not None and any([parent, read_session, max_stream_count]): + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.CreateReadSessionRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + if read_session is not None: + request.read_session = read_session + if max_stream_count is not None: + request.max_stream_count = max_stream_count + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.create_read_session, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_session.table", request.read_session.table),) + ), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def read_rows( + self, + request: storage.ReadRowsRequest = None, + *, + read_stream: str = None, + offset: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> AsyncIterable[storage.ReadRowsResponse]: + r"""Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Args: + request (:class:`~.storage.ReadRowsRequest`): + The request object. Request message for `ReadRows`. + read_stream (:class:`str`): + Required. Stream to read rows from. + This corresponds to the ``read_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + offset (:class:`int`): + The offset requested must be less + than the last row read from Read. + Requesting a larger offset is undefined. + If not specified, start reading from + offset zero. + This corresponds to the ``offset`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + AsyncIterable[~.storage.ReadRowsResponse]: + Response from calling ``ReadRows`` may include row data, + progress and throttling information. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + if request is not None and any([read_stream, offset]): + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.ReadRowsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if read_stream is not None: + request.read_stream = read_stream + if offset is not None: + request.offset = offset + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.read_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type(exceptions.ServiceUnavailable,), + ), + default_timeout=86400.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_stream", request.read_stream),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def split_read_stream( + self, + request: storage.SplitReadStreamRequest = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.SplitReadStreamResponse: + r"""Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Args: + request (:class:`~.storage.SplitReadStreamRequest`): + The request object. Request message for + `SplitReadStream`. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.storage.SplitReadStreamResponse: + Response message for ``SplitReadStream``. + """ + # Create or coerce a protobuf request object. + + request = storage.SplitReadStreamRequest(request) + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.split_read_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("BigQueryReadAsyncClient",) diff --git a/google/cloud/bigquery/storage_v1/services/big_query_read/client.py b/google/cloud/bigquery/storage_v1/services/big_query_read/client.py new file mode 100644 index 00000000..38279b78 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/client.py @@ -0,0 +1,576 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from distutils import util +import os +import re +from typing import Callable, Dict, Iterable, Sequence, Tuple, Type, Union +import pkg_resources + +import google.api_core.client_options as ClientOptions # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport import mtls # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore +from google.auth.exceptions import MutualTLSChannelError # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.cloud.bigquery.storage_v1.types import arrow +from google.cloud.bigquery.storage_v1.types import avro +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + +from .transports.base import BigQueryReadTransport, DEFAULT_CLIENT_INFO +from .transports.grpc import BigQueryReadGrpcTransport +from .transports.grpc_asyncio import BigQueryReadGrpcAsyncIOTransport + + +class BigQueryReadClientMeta(type): + """Metaclass for the BigQueryRead client. + + This provides class-level methods for building and retrieving + support objects (e.g. transport) without polluting the client instance + objects. + """ + + _transport_registry = OrderedDict() # type: Dict[str, Type[BigQueryReadTransport]] + _transport_registry["grpc"] = BigQueryReadGrpcTransport + _transport_registry["grpc_asyncio"] = BigQueryReadGrpcAsyncIOTransport + + def get_transport_class(cls, label: str = None,) -> Type[BigQueryReadTransport]: + """Return an appropriate transport class. + + Args: + label: The name of the desired transport. If none is + provided, then the first transport in the registry is used. + + Returns: + The transport class to use. + """ + # If a specific transport is requested, return that one. + if label: + return cls._transport_registry[label] + + # No transport is requested; return the default (that is, the first one + # in the dictionary). + return next(iter(cls._transport_registry.values())) + + +class BigQueryReadClient(metaclass=BigQueryReadClientMeta): + """BigQuery Read API. + The Read API can be used to read data from BigQuery. + """ + + @staticmethod + def _get_default_mtls_endpoint(api_endpoint): + """Convert api endpoint to mTLS endpoint. + Convert "*.sandbox.googleapis.com" and "*.googleapis.com" to + "*.mtls.sandbox.googleapis.com" and "*.mtls.googleapis.com" respectively. + Args: + api_endpoint (Optional[str]): the api endpoint to convert. + Returns: + str: converted mTLS api endpoint. + """ + if not api_endpoint: + return api_endpoint + + mtls_endpoint_re = re.compile( + r"(?P[^.]+)(?P\.mtls)?(?P\.sandbox)?(?P\.googleapis\.com)?" + ) + + m = mtls_endpoint_re.match(api_endpoint) + name, mtls, sandbox, googledomain = m.groups() + if mtls or not googledomain: + return api_endpoint + + if sandbox: + return api_endpoint.replace( + "sandbox.googleapis.com", "mtls.sandbox.googleapis.com" + ) + + return api_endpoint.replace(".googleapis.com", ".mtls.googleapis.com") + + DEFAULT_ENDPOINT = "bigquerystorage.googleapis.com" + DEFAULT_MTLS_ENDPOINT = _get_default_mtls_endpoint.__func__( # type: ignore + DEFAULT_ENDPOINT + ) + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + {@api.name}: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file(filename) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + @staticmethod + def read_session_path(project: str, location: str, session: str,) -> str: + """Return a fully-qualified read_session string.""" + return "projects/{project}/locations/{location}/sessions/{session}".format( + project=project, location=location, session=session, + ) + + @staticmethod + def parse_read_session_path(path: str) -> Dict[str, str]: + """Parse a read_session path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/sessions/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def read_stream_path( + project: str, location: str, session: str, stream: str, + ) -> str: + """Return a fully-qualified read_stream string.""" + return "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}".format( + project=project, location=location, session=session, stream=stream, + ) + + @staticmethod + def parse_read_stream_path(path: str) -> Dict[str, str]: + """Parse a read_stream path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/sessions/(?P.+?)/streams/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + def __init__( + self, + *, + credentials: credentials.Credentials = None, + transport: Union[str, BigQueryReadTransport] = None, + client_options: ClientOptions = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the big query read client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, ~.BigQueryReadTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (ClientOptions): Custom options for the client. It + won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + """ + if isinstance(client_options, dict): + client_options = ClientOptions.from_dict(client_options) + if client_options is None: + client_options = ClientOptions.ClientOptions() + + # Create SSL credentials for mutual TLS if needed. + use_client_cert = bool( + util.strtobool(os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false")) + ) + + ssl_credentials = None + is_mtls = False + if use_client_cert: + if client_options.client_cert_source: + import grpc # type: ignore + + cert, key = client_options.client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + is_mtls = True + else: + creds = SslCredentials() + is_mtls = creds.is_mtls + ssl_credentials = creds.ssl_credentials if is_mtls else None + + # Figure out which api endpoint to use. + if client_options.api_endpoint is not None: + api_endpoint = client_options.api_endpoint + else: + use_mtls_env = os.getenv("GOOGLE_API_USE_MTLS_ENDPOINT", "auto") + if use_mtls_env == "never": + api_endpoint = self.DEFAULT_ENDPOINT + elif use_mtls_env == "always": + api_endpoint = self.DEFAULT_MTLS_ENDPOINT + elif use_mtls_env == "auto": + api_endpoint = ( + self.DEFAULT_MTLS_ENDPOINT if is_mtls else self.DEFAULT_ENDPOINT + ) + else: + raise MutualTLSChannelError( + "Unsupported GOOGLE_API_USE_MTLS_ENDPOINT value. Accepted values: never, auto, always" + ) + + # Save or instantiate the transport. + # Ordinarily, we provide the transport, but allowing a custom transport + # instance provides an extensibility point for unusual situations. + if isinstance(transport, BigQueryReadTransport): + # transport is a BigQueryReadTransport instance. + if credentials or client_options.credentials_file: + raise ValueError( + "When providing a transport instance, " + "provide its credentials directly." + ) + if client_options.scopes: + raise ValueError( + "When providing a transport instance, " + "provide its scopes directly." + ) + self._transport = transport + else: + Transport = type(self).get_transport_class(transport) + self._transport = Transport( + credentials=credentials, + credentials_file=client_options.credentials_file, + host=api_endpoint, + scopes=client_options.scopes, + ssl_channel_credentials=ssl_credentials, + quota_project_id=client_options.quota_project_id, + client_info=client_info, + ) + + def create_read_session( + self, + request: storage.CreateReadSessionRequest = None, + *, + parent: str = None, + read_session: stream.ReadSession = None, + max_stream_count: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.ReadSession: + r"""Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Args: + request (:class:`~.storage.CreateReadSessionRequest`): + The request object. Request message for + `CreateReadSession`. + parent (:class:`str`): + Required. The request project that owns the session, in + the form of ``projects/{project_id}``. + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + read_session (:class:`~.stream.ReadSession`): + Required. Session to be created. + This corresponds to the ``read_session`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + max_stream_count (:class:`int`): + Max initial number of streams. If + unset or zero, the server will provide a + value of streams so as to produce + reasonable throughput. Must be non- + negative. The number of streams may be + lower than the requested number, + depending on the amount parallelism that + is reasonable for the table. Error will + be returned if the max count is greater + than the current system max limit of + 1,000. + + Streams must be read starting from + offset 0. + This corresponds to the ``max_stream_count`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.stream.ReadSession: + Information about the ReadSession. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent, read_session, max_stream_count]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.CreateReadSessionRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.CreateReadSessionRequest): + request = storage.CreateReadSessionRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + if read_session is not None: + request.read_session = read_session + if max_stream_count is not None: + request.max_stream_count = max_stream_count + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.create_read_session] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_session.table", request.read_session.table),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def read_rows( + self, + request: storage.ReadRowsRequest = None, + *, + read_stream: str = None, + offset: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Iterable[storage.ReadRowsResponse]: + r"""Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Args: + request (:class:`~.storage.ReadRowsRequest`): + The request object. Request message for `ReadRows`. + read_stream (:class:`str`): + Required. Stream to read rows from. + This corresponds to the ``read_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + offset (:class:`int`): + The offset requested must be less + than the last row read from Read. + Requesting a larger offset is undefined. + If not specified, start reading from + offset zero. + This corresponds to the ``offset`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + Iterable[~.storage.ReadRowsResponse]: + Response from calling ``ReadRows`` may include row data, + progress and throttling information. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([read_stream, offset]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.ReadRowsRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.ReadRowsRequest): + request = storage.ReadRowsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if read_stream is not None: + request.read_stream = read_stream + if offset is not None: + request.offset = offset + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.read_rows] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_stream", request.read_stream),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def split_read_stream( + self, + request: storage.SplitReadStreamRequest = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.SplitReadStreamResponse: + r"""Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Args: + request (:class:`~.storage.SplitReadStreamRequest`): + The request object. Request message for + `SplitReadStream`. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.storage.SplitReadStreamResponse: + Response message for ``SplitReadStream``. + """ + # Create or coerce a protobuf request object. + + # Minor optimization to avoid making a copy if the user passes + # in a storage.SplitReadStreamRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.SplitReadStreamRequest): + request = storage.SplitReadStreamRequest(request) + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.split_read_stream] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("BigQueryReadClient",) diff --git a/google/cloud/bigquery/storage_v1/services/big_query_read/transports/__init__.py b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/__init__.py new file mode 100644 index 00000000..2e9fe066 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/__init__.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from typing import Dict, Type + +from .base import BigQueryReadTransport +from .grpc import BigQueryReadGrpcTransport +from .grpc_asyncio import BigQueryReadGrpcAsyncIOTransport + + +# Compile a registry of transports. +_transport_registry = OrderedDict() # type: Dict[str, Type[BigQueryReadTransport]] +_transport_registry["grpc"] = BigQueryReadGrpcTransport +_transport_registry["grpc_asyncio"] = BigQueryReadGrpcAsyncIOTransport + + +__all__ = ( + "BigQueryReadTransport", + "BigQueryReadGrpcTransport", + "BigQueryReadGrpcAsyncIOTransport", +) diff --git a/google/cloud/bigquery/storage_v1/services/big_query_read/transports/base.py b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/base.py new file mode 100644 index 00000000..3b9b0e71 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/base.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import abc +import typing +import pkg_resources + +from google import auth # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore + +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +class BigQueryReadTransport(abc.ABC): + """Abstract transport class for BigQueryRead.""" + + AUTH_SCOPES = ( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ) + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: typing.Optional[str] = None, + scopes: typing.Optional[typing.Sequence[str]] = AUTH_SCOPES, + quota_project_id: typing.Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + **kwargs, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scope (Optional[Sequence[str]]): A list of scopes. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + """ + # Save the hostname. Default to port 443 (HTTPS) if none is specified. + if ":" not in host: + host += ":443" + self._host = host + + # If no credentials are provided, then determine the appropriate + # defaults. + if credentials and credentials_file: + raise exceptions.DuplicateCredentialArgs( + "'credentials_file' and 'credentials' are mutually exclusive" + ) + + if credentials_file is not None: + credentials, _ = auth.load_credentials_from_file( + credentials_file, scopes=scopes, quota_project_id=quota_project_id + ) + + elif credentials is None: + credentials, _ = auth.default( + scopes=scopes, quota_project_id=quota_project_id + ) + + # Save the credentials. + self._credentials = credentials + + # Lifted into its own function so it can be stubbed out during tests. + self._prep_wrapped_messages(client_info) + + def _prep_wrapped_messages(self, client_info): + # Precompute the wrapped methods. + self._wrapped_methods = { + self.create_read_session: gapic_v1.method.wrap_method( + self.create_read_session, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + self.read_rows: gapic_v1.method.wrap_method( + self.read_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type(exceptions.ServiceUnavailable,), + ), + default_timeout=86400.0, + client_info=client_info, + ), + self.split_read_stream: gapic_v1.method.wrap_method( + self.split_read_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + } + + @property + def create_read_session( + self, + ) -> typing.Callable[ + [storage.CreateReadSessionRequest], + typing.Union[stream.ReadSession, typing.Awaitable[stream.ReadSession]], + ]: + raise NotImplementedError() + + @property + def read_rows( + self, + ) -> typing.Callable[ + [storage.ReadRowsRequest], + typing.Union[ + storage.ReadRowsResponse, typing.Awaitable[storage.ReadRowsResponse] + ], + ]: + raise NotImplementedError() + + @property + def split_read_stream( + self, + ) -> typing.Callable[ + [storage.SplitReadStreamRequest], + typing.Union[ + storage.SplitReadStreamResponse, + typing.Awaitable[storage.SplitReadStreamResponse], + ], + ]: + raise NotImplementedError() + + +__all__ = ("BigQueryReadTransport",) diff --git a/google/cloud/bigquery/storage_v1/services/big_query_read/transports/grpc.py b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/grpc.py new file mode 100644 index 00000000..17ecafe6 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/grpc.py @@ -0,0 +1,354 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import grpc_helpers # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore + +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream + +from .base import BigQueryReadTransport, DEFAULT_CLIENT_INFO + + +class BigQueryReadGrpcTransport(BigQueryReadTransport): + """gRPC backend transport for BigQueryRead. + + BigQuery Read API. + The Read API can be used to read data from BigQuery. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _stubs: Dict[str, Callable] + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Sequence[str] = None, + channel: grpc.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + quota_project_id: Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + channel (Optional[grpc.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + if channel: + # Sanity check: Ensure that channel and credentials are not both + # provided. + credentials = False + + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + elif api_mtls_endpoint: + warnings.warn( + "api_mtls_endpoint and client_cert_source are deprecated", + DeprecationWarning, + ) + + host = ( + api_mtls_endpoint + if ":" in api_mtls_endpoint + else api_mtls_endpoint + ":443" + ) + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + ssl_credentials = SslCredentials().ssl_credentials + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + ) + else: + host = host if ":" in host else host + ":443" + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_channel_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + ) + + self._stubs = {} # type: Dict[str, Callable] + + # Run the base constructor. + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + @classmethod + def create_channel( + cls, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> grpc.Channel: + """Create and return a gRPC channel object. + Args: + address (Optionsl[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + grpc.Channel: A gRPC channel object. + + Raises: + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + @property + def grpc_channel(self) -> grpc.Channel: + """Create the channel designed to connect to this service. + + This property caches on the instance; repeated calls return + the same channel. + """ + # Return the channel from cache. + return self._grpc_channel + + @property + def create_read_session( + self, + ) -> Callable[[storage.CreateReadSessionRequest], stream.ReadSession]: + r"""Return a callable for the create read session method over gRPC. + + Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Returns: + Callable[[~.CreateReadSessionRequest], + ~.ReadSession]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "create_read_session" not in self._stubs: + self._stubs["create_read_session"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1.BigQueryRead/CreateReadSession", + request_serializer=storage.CreateReadSessionRequest.serialize, + response_deserializer=stream.ReadSession.deserialize, + ) + return self._stubs["create_read_session"] + + @property + def read_rows( + self, + ) -> Callable[[storage.ReadRowsRequest], storage.ReadRowsResponse]: + r"""Return a callable for the read rows method over gRPC. + + Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Returns: + Callable[[~.ReadRowsRequest], + ~.ReadRowsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "read_rows" not in self._stubs: + self._stubs["read_rows"] = self.grpc_channel.unary_stream( + "/google.cloud.bigquery.storage.v1.BigQueryRead/ReadRows", + request_serializer=storage.ReadRowsRequest.serialize, + response_deserializer=storage.ReadRowsResponse.deserialize, + ) + return self._stubs["read_rows"] + + @property + def split_read_stream( + self, + ) -> Callable[[storage.SplitReadStreamRequest], storage.SplitReadStreamResponse]: + r"""Return a callable for the split read stream method over gRPC. + + Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Returns: + Callable[[~.SplitReadStreamRequest], + ~.SplitReadStreamResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "split_read_stream" not in self._stubs: + self._stubs["split_read_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1.BigQueryRead/SplitReadStream", + request_serializer=storage.SplitReadStreamRequest.serialize, + response_deserializer=storage.SplitReadStreamResponse.deserialize, + ) + return self._stubs["split_read_stream"] + + +__all__ = ("BigQueryReadGrpcTransport",) diff --git a/google/cloud/bigquery/storage_v1/services/big_query_read/transports/grpc_asyncio.py b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/grpc_asyncio.py new file mode 100644 index 00000000..ff5755c9 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/services/big_query_read/transports/grpc_asyncio.py @@ -0,0 +1,356 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import gapic_v1 # type: ignore +from google.api_core import grpc_helpers_async # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore +from grpc.experimental import aio # type: ignore + +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream + +from .base import BigQueryReadTransport, DEFAULT_CLIENT_INFO +from .grpc import BigQueryReadGrpcTransport + + +class BigQueryReadGrpcAsyncIOTransport(BigQueryReadTransport): + """gRPC AsyncIO backend transport for BigQueryRead. + + BigQuery Read API. + The Read API can be used to read data from BigQuery. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _grpc_channel: aio.Channel + _stubs: Dict[str, Callable] = {} + + @classmethod + def create_channel( + cls, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> aio.Channel: + """Create and return a gRPC AsyncIO channel object. + Args: + address (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + aio.Channel: A gRPC AsyncIO channel object. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers_async.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + channel: aio.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + quota_project_id=None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + channel (Optional[aio.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + if channel: + # Sanity check: Ensure that channel and credentials are not both + # provided. + credentials = False + + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + elif api_mtls_endpoint: + warnings.warn( + "api_mtls_endpoint and client_cert_source are deprecated", + DeprecationWarning, + ) + + host = ( + api_mtls_endpoint + if ":" in api_mtls_endpoint + else api_mtls_endpoint + ":443" + ) + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + ssl_credentials = SslCredentials().ssl_credentials + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + ) + else: + host = host if ":" in host else host + ":443" + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_channel_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + ) + + # Run the base constructor. + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + self._stubs = {} + + @property + def grpc_channel(self) -> aio.Channel: + """Create the channel designed to connect to this service. + + This property caches on the instance; repeated calls return + the same channel. + """ + # Return the channel from cache. + return self._grpc_channel + + @property + def create_read_session( + self, + ) -> Callable[[storage.CreateReadSessionRequest], Awaitable[stream.ReadSession]]: + r"""Return a callable for the create read session method over gRPC. + + Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Returns: + Callable[[~.CreateReadSessionRequest], + Awaitable[~.ReadSession]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "create_read_session" not in self._stubs: + self._stubs["create_read_session"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1.BigQueryRead/CreateReadSession", + request_serializer=storage.CreateReadSessionRequest.serialize, + response_deserializer=stream.ReadSession.deserialize, + ) + return self._stubs["create_read_session"] + + @property + def read_rows( + self, + ) -> Callable[[storage.ReadRowsRequest], Awaitable[storage.ReadRowsResponse]]: + r"""Return a callable for the read rows method over gRPC. + + Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Returns: + Callable[[~.ReadRowsRequest], + Awaitable[~.ReadRowsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "read_rows" not in self._stubs: + self._stubs["read_rows"] = self.grpc_channel.unary_stream( + "/google.cloud.bigquery.storage.v1.BigQueryRead/ReadRows", + request_serializer=storage.ReadRowsRequest.serialize, + response_deserializer=storage.ReadRowsResponse.deserialize, + ) + return self._stubs["read_rows"] + + @property + def split_read_stream( + self, + ) -> Callable[ + [storage.SplitReadStreamRequest], Awaitable[storage.SplitReadStreamResponse] + ]: + r"""Return a callable for the split read stream method over gRPC. + + Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Returns: + Callable[[~.SplitReadStreamRequest], + Awaitable[~.SplitReadStreamResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "split_read_stream" not in self._stubs: + self._stubs["split_read_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1.BigQueryRead/SplitReadStream", + request_serializer=storage.SplitReadStreamRequest.serialize, + response_deserializer=storage.SplitReadStreamResponse.deserialize, + ) + return self._stubs["split_read_stream"] + + +__all__ = ("BigQueryReadGrpcAsyncIOTransport",) diff --git a/google/cloud/bigquery/storage_v1/types/__init__.py b/google/cloud/bigquery/storage_v1/types/__init__.py new file mode 100644 index 00000000..0d37362f --- /dev/null +++ b/google/cloud/bigquery/storage_v1/types/__init__.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .arrow import ( + ArrowSchema, + ArrowRecordBatch, +) +from .avro import ( + AvroSchema, + AvroRows, +) +from .stream import ( + ReadSession, + ReadStream, +) +from .storage import ( + CreateReadSessionRequest, + ReadRowsRequest, + ThrottleState, + StreamStats, + ReadRowsResponse, + SplitReadStreamRequest, + SplitReadStreamResponse, +) + + +__all__ = ( + "ArrowSchema", + "ArrowRecordBatch", + "AvroSchema", + "AvroRows", + "ReadSession", + "ReadStream", + "CreateReadSessionRequest", + "ReadRowsRequest", + "ThrottleState", + "StreamStats", + "ReadRowsResponse", + "SplitReadStreamRequest", + "SplitReadStreamResponse", +) diff --git a/google/cloud/bigquery/storage_v1/types/arrow.py b/google/cloud/bigquery/storage_v1/types/arrow.py new file mode 100644 index 00000000..e77b4576 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/types/arrow.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1", + manifest={"ArrowSchema", "ArrowRecordBatch",}, +) + + +class ArrowSchema(proto.Message): + r"""Arrow schema as specified in + https://arrow.apache.org/docs/python/api/datatypes.html and + serialized to bytes using IPC: + https://arrow.apache.org/docs/format/Columnar.html#serialization- + and-interprocess-communication-ipc + See code samples on how this message can be deserialized. + + Attributes: + serialized_schema (bytes): + IPC serialized Arrow schema. + """ + + serialized_schema = proto.Field(proto.BYTES, number=1) + + +class ArrowRecordBatch(proto.Message): + r"""Arrow RecordBatch. + + Attributes: + serialized_record_batch (bytes): + IPC-serialized Arrow RecordBatch. + row_count (int): + The count of rows in ``serialized_record_batch``. + """ + + serialized_record_batch = proto.Field(proto.BYTES, number=1) + + row_count = proto.Field(proto.INT64, number=2) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery/storage_v1/types/avro.py b/google/cloud/bigquery/storage_v1/types/avro.py new file mode 100644 index 00000000..a58b2a69 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/types/avro.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1", manifest={"AvroSchema", "AvroRows",}, +) + + +class AvroSchema(proto.Message): + r"""Avro schema. + + Attributes: + schema (str): + Json serialized schema, as described at + https://avro.apache.org/docs/1.8.1/spec.html. + """ + + schema = proto.Field(proto.STRING, number=1) + + +class AvroRows(proto.Message): + r"""Avro rows. + + Attributes: + serialized_binary_rows (bytes): + Binary serialized rows in a block. + row_count (int): + The count of rows in the returning block. + """ + + serialized_binary_rows = proto.Field(proto.BYTES, number=1) + + row_count = proto.Field(proto.INT64, number=2) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery/storage_v1/types/storage.py b/google/cloud/bigquery/storage_v1/types/storage.py new file mode 100644 index 00000000..57584b28 --- /dev/null +++ b/google/cloud/bigquery/storage_v1/types/storage.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.bigquery.storage_v1.types import arrow +from google.cloud.bigquery.storage_v1.types import avro +from google.cloud.bigquery.storage_v1.types import stream + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1", + manifest={ + "CreateReadSessionRequest", + "ReadRowsRequest", + "ThrottleState", + "StreamStats", + "ReadRowsResponse", + "SplitReadStreamRequest", + "SplitReadStreamResponse", + }, +) + + +class CreateReadSessionRequest(proto.Message): + r"""Request message for ``CreateReadSession``. + + Attributes: + parent (str): + Required. The request project that owns the session, in the + form of ``projects/{project_id}``. + read_session (~.stream.ReadSession): + Required. Session to be created. + max_stream_count (int): + Max initial number of streams. If unset or + zero, the server will provide a value of streams + so as to produce reasonable throughput. Must be + non-negative. The number of streams may be lower + than the requested number, depending on the + amount parallelism that is reasonable for the + table. Error will be returned if the max count + is greater than the current system max limit of + 1,000. + + Streams must be read starting from offset 0. + """ + + parent = proto.Field(proto.STRING, number=1) + + read_session = proto.Field(proto.MESSAGE, number=2, message=stream.ReadSession,) + + max_stream_count = proto.Field(proto.INT32, number=3) + + +class ReadRowsRequest(proto.Message): + r"""Request message for ``ReadRows``. + + Attributes: + read_stream (str): + Required. Stream to read rows from. + offset (int): + The offset requested must be less than the + last row read from Read. Requesting a larger + offset is undefined. If not specified, start + reading from offset zero. + """ + + read_stream = proto.Field(proto.STRING, number=1) + + offset = proto.Field(proto.INT64, number=2) + + +class ThrottleState(proto.Message): + r"""Information on if the current connection is being throttled. + + Attributes: + throttle_percent (int): + How much this connection is being throttled. + Zero means no throttling, 100 means fully + throttled. + """ + + throttle_percent = proto.Field(proto.INT32, number=1) + + +class StreamStats(proto.Message): + r"""Estimated stream statistics for a given Stream. + + Attributes: + progress (~.storage.StreamStats.Progress): + Represents the progress of the current + stream. + """ + + class Progress(proto.Message): + r""" + + Attributes: + at_response_start (float): + The fraction of rows assigned to the stream that have been + processed by the server so far, not including the rows in + the current response message. + + This value, along with ``at_response_end``, can be used to + interpolate the progress made as the rows in the message are + being processed using the following formula: + ``at_response_start + (at_response_end - at_response_start) * rows_processed_from_response / rows_in_response``. + + Note that if a filter is provided, the ``at_response_end`` + value of the previous response may not necessarily be equal + to the ``at_response_start`` value of the current response. + at_response_end (float): + Similar to ``at_response_start``, except that this value + includes the rows in the current response. + """ + + at_response_start = proto.Field(proto.DOUBLE, number=1) + + at_response_end = proto.Field(proto.DOUBLE, number=2) + + progress = proto.Field(proto.MESSAGE, number=2, message=Progress,) + + +class ReadRowsResponse(proto.Message): + r"""Response from calling ``ReadRows`` may include row data, progress + and throttling information. + + Attributes: + avro_rows (~.avro.AvroRows): + Serialized row data in AVRO format. + arrow_record_batch (~.arrow.ArrowRecordBatch): + Serialized row data in Arrow RecordBatch + format. + row_count (int): + Number of serialized rows in the rows block. + stats (~.storage.StreamStats): + Statistics for the stream. + throttle_state (~.storage.ThrottleState): + Throttling state. If unset, the latest + response still describes the current throttling + status. + """ + + avro_rows = proto.Field( + proto.MESSAGE, number=3, oneof="rows", message=avro.AvroRows, + ) + + arrow_record_batch = proto.Field( + proto.MESSAGE, number=4, oneof="rows", message=arrow.ArrowRecordBatch, + ) + + row_count = proto.Field(proto.INT64, number=6) + + stats = proto.Field(proto.MESSAGE, number=2, message=StreamStats,) + + throttle_state = proto.Field(proto.MESSAGE, number=5, message=ThrottleState,) + + +class SplitReadStreamRequest(proto.Message): + r"""Request message for ``SplitReadStream``. + + Attributes: + name (str): + Required. Name of the stream to split. + fraction (float): + A value in the range (0.0, 1.0) that + specifies the fractional point at which the + original stream should be split. The actual + split point is evaluated on pre-filtered rows, + so if a filter is provided, then there is no + guarantee that the division of the rows between + the new child streams will be proportional to + this fractional value. Additionally, because the + server-side unit for assigning data is + collections of rows, this fraction will always + map to a data storage boundary on the server + side. + """ + + name = proto.Field(proto.STRING, number=1) + + fraction = proto.Field(proto.DOUBLE, number=2) + + +class SplitReadStreamResponse(proto.Message): + r"""Response message for ``SplitReadStream``. + + Attributes: + primary_stream (~.stream.ReadStream): + Primary stream, which contains the beginning portion of + \|original_stream|. An empty value indicates that the + original stream can no longer be split. + remainder_stream (~.stream.ReadStream): + Remainder stream, which contains the tail of + \|original_stream|. An empty value indicates that the + original stream can no longer be split. + """ + + primary_stream = proto.Field(proto.MESSAGE, number=1, message=stream.ReadStream,) + + remainder_stream = proto.Field(proto.MESSAGE, number=2, message=stream.ReadStream,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery/storage_v1/types/stream.py b/google/cloud/bigquery/storage_v1/types/stream.py new file mode 100644 index 00000000..99b7afee --- /dev/null +++ b/google/cloud/bigquery/storage_v1/types/stream.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.bigquery.storage_v1.types import arrow +from google.cloud.bigquery.storage_v1.types import avro +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1", + manifest={"DataFormat", "ReadSession", "ReadStream",}, +) + + +class DataFormat(proto.Enum): + r"""Data format for input or output data.""" + DATA_FORMAT_UNSPECIFIED = 0 + AVRO = 1 + ARROW = 2 + + +class ReadSession(proto.Message): + r"""Information about the ReadSession. + + Attributes: + name (str): + Output only. Unique identifier for the session, in the form + ``projects/{project_id}/locations/{location}/sessions/{session_id}``. + expire_time (~.timestamp.Timestamp): + Output only. Time at which the session becomes invalid. + After this time, subsequent requests to read this Session + will return errors. The expire_time is automatically + assigned and currently cannot be specified or updated. + data_format (~.stream.DataFormat): + Immutable. Data format of the output data. + avro_schema (~.avro.AvroSchema): + Output only. Avro schema. + arrow_schema (~.arrow.ArrowSchema): + Output only. Arrow schema. + table (str): + Immutable. Table that this ReadSession is reading from, in + the form + ``projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`` + table_modifiers (~.stream.ReadSession.TableModifiers): + Optional. Any modifiers which are applied + when reading from the specified table. + read_options (~.stream.ReadSession.TableReadOptions): + Optional. Read options for this session (e.g. + column selection, filters). + streams (Sequence[~.stream.ReadStream]): + Output only. A list of streams created with the session. + + At least one stream is created with the session. In the + future, larger request_stream_count values *may* result in + this list being unpopulated, in that case, the user will + need to use a List method to get the streams instead, which + is not yet available. + """ + + class TableModifiers(proto.Message): + r"""Additional attributes when reading a table. + + Attributes: + snapshot_time (~.timestamp.Timestamp): + The snapshot time of the table. If not set, + interpreted as now. + """ + + snapshot_time = proto.Field( + proto.MESSAGE, number=1, message=timestamp.Timestamp, + ) + + class TableReadOptions(proto.Message): + r"""Options dictating how we read a table. + + Attributes: + selected_fields (Sequence[str]): + Names of the fields in the table that should be read. If + empty, all fields will be read. If the specified field is a + nested field, all the sub-fields in the field will be + selected. The output field order is unrelated to the order + of fields in selected_fields. + row_restriction (str): + SQL text filtering statement, similar to a WHERE clause in a + query. Aggregates are not supported. + + Examples: "int_field > 5" "date_field = CAST('2014-9-27' as + DATE)" "nullable_field is not NULL" "st_equals(geo_field, + st_geofromtext("POINT(2, 2)"))" "numeric_field BETWEEN 1.0 + AND 5.0". + """ + + selected_fields = proto.RepeatedField(proto.STRING, number=1) + + row_restriction = proto.Field(proto.STRING, number=2) + + name = proto.Field(proto.STRING, number=1) + + expire_time = proto.Field(proto.MESSAGE, number=2, message=timestamp.Timestamp,) + + data_format = proto.Field(proto.ENUM, number=3, enum="DataFormat",) + + avro_schema = proto.Field( + proto.MESSAGE, number=4, oneof="schema", message=avro.AvroSchema, + ) + + arrow_schema = proto.Field( + proto.MESSAGE, number=5, oneof="schema", message=arrow.ArrowSchema, + ) + + table = proto.Field(proto.STRING, number=6) + + table_modifiers = proto.Field(proto.MESSAGE, number=7, message=TableModifiers,) + + read_options = proto.Field(proto.MESSAGE, number=8, message=TableReadOptions,) + + streams = proto.RepeatedField(proto.MESSAGE, number=10, message="ReadStream",) + + +class ReadStream(proto.Message): + r"""Information about a single stream that gets data out of the storage + system. Most of the information about ``ReadStream`` instances is + aggregated, making ``ReadStream`` lightweight. + + Attributes: + name (str): + Output only. Name of the stream, in the form + ``projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}``. + """ + + name = proto.Field(proto.STRING, number=1) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_storage_v1/__init__.py b/google/cloud/bigquery_storage_v1/__init__.py index c151d9e5..4cca4cfe 100644 --- a/google/cloud/bigquery_storage_v1/__init__.py +++ b/google/cloud/bigquery_storage_v1/__init__.py @@ -22,14 +22,12 @@ "google-cloud-bigquery-storage" ).version # noqa -from google.cloud.bigquery_storage_v1 import types from google.cloud.bigquery_storage_v1 import client -from google.cloud.bigquery_storage_v1.gapic import enums +from google.cloud.bigquery_storage_v1 import types class BigQueryReadClient(client.BigQueryReadClient): __doc__ = client.BigQueryReadClient.__doc__ - enums = enums __all__ = ( @@ -38,6 +36,4 @@ class BigQueryReadClient(client.BigQueryReadClient): "types", # google.cloud.bigquery_storage_v1.client "BigQueryReadClient", - # google.cloud.bigquery_storage_v1.gapic - "enums", ) diff --git a/google/cloud/bigquery_storage_v1/client.py b/google/cloud/bigquery_storage_v1/client.py index aa341f25..a910a574 100644 --- a/google/cloud/bigquery_storage_v1/client.py +++ b/google/cloud/bigquery_storage_v1/client.py @@ -23,8 +23,8 @@ import google.api_core.gapic_v1.method +from google.cloud.bigquery import storage_v1 from google.cloud.bigquery_storage_v1 import reader -from google.cloud.bigquery_storage_v1.gapic import big_query_read_client # noqa _SCOPES = ( @@ -33,7 +33,7 @@ ) -class BigQueryReadClient(big_query_read_client.BigQueryReadClient): +class BigQueryReadClient(storage_v1.BigQueryReadClient): """Client for interacting with BigQuery Storage API. The BigQuery storage API can be used to read data stored in BigQuery. @@ -45,7 +45,7 @@ def read_rows( offset=0, retry=google.api_core.gapic_v1.method.DEFAULT, timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, + metadata=(), ): """ Reads rows from the table in the format prescribed by the read @@ -60,9 +60,9 @@ def read_rows( to read data. Example: - >>> from google.cloud import bigquery_storage_v1 + >>> from google.cloud.bigquery import storage >>> - >>> client = bigquery_storage_v1.BigQueryReadClient() + >>> client = storage.BigQueryReadClient() >>> >>> # TODO: Initialize ``table``: >>> table = "projects/{}/datasets/{}/tables/{}".format( @@ -74,11 +74,13 @@ def read_rows( >>> # TODO: Initialize `parent`: >>> parent = 'projects/your-billing-project-id' >>> - >>> requested_session = bigquery_storage_v1.types.ReadSession( + >>> requested_session = storage.types.ReadSession( ... table=table, - ... data_format=bigquery_storage_v1.enums.DataFormat.AVRO, + ... data_format=storage.types.DataFormat.AVRO, + ... ) + >>> session = client.create_read_session( + ... parent=parent, read_session=requested_session ... ) - >>> session = client.create_read_session(parent, requested_session) >>> >>> stream = session.streams[0], # TODO: Also read any other streams. >>> read_rows_stream = client.read_rows(stream.name) diff --git a/google/cloud/bigquery_storage_v1/gapic/__init__.py b/google/cloud/bigquery_storage_v1/gapic/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1/gapic/big_query_read_client.py b/google/cloud/bigquery_storage_v1/gapic/big_query_read_client.py deleted file mode 100644 index 69335cb7..00000000 --- a/google/cloud/bigquery_storage_v1/gapic/big_query_read_client.py +++ /dev/null @@ -1,503 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Accesses the google.cloud.bigquery.storage.v1 BigQueryRead API.""" - -import pkg_resources -import warnings - -from google.oauth2 import service_account -import google.api_core.client_options -import google.api_core.gapic_v1.client_info -import google.api_core.gapic_v1.config -import google.api_core.gapic_v1.method -import google.api_core.path_template -import google.api_core.gapic_v1.routing_header -import google.api_core.grpc_helpers -import google.api_core.path_template -import grpc - -from google.cloud.bigquery_storage_v1.gapic import big_query_read_client_config -from google.cloud.bigquery_storage_v1.gapic import enums -from google.cloud.bigquery_storage_v1.gapic.transports import ( - big_query_read_grpc_transport, -) -from google.cloud.bigquery_storage_v1.proto import storage_pb2 -from google.cloud.bigquery_storage_v1.proto import storage_pb2_grpc -from google.cloud.bigquery_storage_v1.proto import stream_pb2 - - -_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( - "google-cloud-bigquery-storage", -).version - - -class BigQueryReadClient(object): - """ - BigQuery Read API. - - The Read API can be used to read data from BigQuery. - """ - - SERVICE_ADDRESS = "bigquerystorage.googleapis.com:443" - """The default address of the service.""" - - # The name of the interface for this client. This is the key used to - # find the method configuration in the client_config dictionary. - _INTERFACE_NAME = "google.cloud.bigquery.storage.v1.BigQueryRead" - - @classmethod - def from_service_account_file(cls, filename, *args, **kwargs): - """Creates an instance of this client using the provided credentials - file. - - Args: - filename (str): The path to the service account private key json - file. - args: Additional arguments to pass to the constructor. - kwargs: Additional arguments to pass to the constructor. - - Returns: - BigQueryReadClient: The constructed client. - """ - credentials = service_account.Credentials.from_service_account_file(filename) - kwargs["credentials"] = credentials - return cls(*args, **kwargs) - - from_service_account_json = from_service_account_file - - @classmethod - def project_path(cls, project): - """Return a fully-qualified project string.""" - return google.api_core.path_template.expand( - "projects/{project}", project=project, - ) - - @classmethod - def read_session_path(cls, project, location, session): - """Return a fully-qualified read_session string.""" - return google.api_core.path_template.expand( - "projects/{project}/locations/{location}/sessions/{session}", - project=project, - location=location, - session=session, - ) - - @classmethod - def read_stream_path(cls, project, location, session, stream): - """Return a fully-qualified read_stream string.""" - return google.api_core.path_template.expand( - "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}", - project=project, - location=location, - session=session, - stream=stream, - ) - - @classmethod - def table_path(cls, project, dataset, table): - """Return a fully-qualified table string.""" - return google.api_core.path_template.expand( - "projects/{project}/datasets/{dataset}/tables/{table}", - project=project, - dataset=dataset, - table=table, - ) - - def __init__( - self, - transport=None, - channel=None, - credentials=None, - client_config=None, - client_info=None, - client_options=None, - ): - """Constructor. - - Args: - transport (Union[~.BigQueryReadGrpcTransport, - Callable[[~.Credentials, type], ~.BigQueryReadGrpcTransport]): A transport - instance, responsible for actually making the API calls. - The default transport uses the gRPC protocol. - This argument may also be a callable which returns a - transport instance. Callables will be sent the credentials - as the first argument and the default transport class as - the second argument. - channel (grpc.Channel): DEPRECATED. A ``Channel`` instance - through which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - This argument is mutually exclusive with providing a - transport instance to ``transport``; doing so will raise - an exception. - client_config (dict): DEPRECATED. A dictionary of call options for - each method. If not specified, the default configuration is used. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing - your own client library. - client_options (Union[dict, google.api_core.client_options.ClientOptions]): - Client options used to set user options on the client. API Endpoint - should be set through client_options. - """ - # Raise deprecation warnings for things we want to go away. - if client_config is not None: - warnings.warn( - "The `client_config` argument is deprecated.", - PendingDeprecationWarning, - stacklevel=2, - ) - else: - client_config = big_query_read_client_config.config - - if channel: - warnings.warn( - "The `channel` argument is deprecated; use " "`transport` instead.", - PendingDeprecationWarning, - stacklevel=2, - ) - - api_endpoint = self.SERVICE_ADDRESS - if client_options: - if type(client_options) == dict: - client_options = google.api_core.client_options.from_dict( - client_options - ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - - # Instantiate the transport. - # The transport is responsible for handling serialization and - # deserialization and actually sending data to the service. - if transport: # pragma: no cover - if callable(transport): - self.transport = transport( - credentials=credentials, - default_class=big_query_read_grpc_transport.BigQueryReadGrpcTransport, - address=api_endpoint, - ) - else: - if credentials: - raise ValueError( - "Received both a transport instance and " - "credentials; these are mutually exclusive." - ) - self.transport = transport - else: - self.transport = big_query_read_grpc_transport.BigQueryReadGrpcTransport( - address=api_endpoint, channel=channel, credentials=credentials, - ) - - if client_info is None: - client_info = google.api_core.gapic_v1.client_info.ClientInfo( - gapic_version=_GAPIC_LIBRARY_VERSION, - ) - else: - client_info.gapic_version = _GAPIC_LIBRARY_VERSION - self._client_info = client_info - - # Parse out the default settings for retry and timeout for each RPC - # from the client configuration. - # (Ordinarily, these are the defaults specified in the `*_config.py` - # file next to this one.) - self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( - client_config["interfaces"][self._INTERFACE_NAME], - ) - - # Save a dictionary of cached API call functions. - # These are the actual callables which invoke the proper - # transport methods, wrapped with `wrap_method` to add retry, - # timeout, and the like. - self._inner_api_calls = {} - - # Service calls - def create_read_session( - self, - parent, - read_session, - max_stream_count=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Data is assigned to each stream such that roughly the same number of - rows can be read from each stream. Because the server-side unit for - assigning data is collections of rows, the API does not guarantee that - each stream will return the same number or rows. Additionally, the - limits are enforced based on the number of pre-filtered rows, so some - filters can lead to lopsided assignments. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - - Example: - >>> from google.cloud import bigquery_storage_v1 - >>> - >>> client = bigquery_storage_v1.BigQueryReadClient() - >>> - >>> parent = client.project_path('[PROJECT]') - >>> - >>> # TODO: Initialize `read_session`: - >>> read_session = {} - >>> - >>> response = client.create_read_session(parent, read_session) - - Args: - parent (str): The resource has one pattern, but the API owner expects to add more - later. (This is the inverse of ORIGINALLY_SINGLE_PATTERN, and prevents - that from being necessary once there are multiple patterns.) - read_session (Union[dict, ~google.cloud.bigquery_storage_v1.types.ReadSession]): Required. Session to be created. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1.types.ReadSession` - max_stream_count (int): Max initial number of streams. If unset or zero, the server will - provide a value of streams so as to produce reasonable throughput. Must be - non-negative. The number of streams may be lower than the requested number, - depending on the amount parallelism that is reasonable for the table. Error - will be returned if the max count is greater than the current system - max limit of 1,000. - - Streams must be read starting from offset 0. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1.types.ReadSession` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "create_read_session" not in self._inner_api_calls: - self._inner_api_calls[ - "create_read_session" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.create_read_session, - default_retry=self._method_configs["CreateReadSession"].retry, - default_timeout=self._method_configs["CreateReadSession"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.CreateReadSessionRequest( - parent=parent, read_session=read_session, max_stream_count=max_stream_count, - ) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("read_session.table", read_session.table)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["create_read_session"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def read_rows( - self, - read_stream, - offset=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Reads rows from the stream in the format prescribed by the ReadSession. - Each response contains one or more table rows, up to a maximum of 100 MiB - per response; read requests which attempt to read individual rows larger - than 100 MiB will fail. - - Each request also returns a set of stream statistics reflecting the current - state of the stream. - - Example: - >>> from google.cloud import bigquery_storage_v1 - >>> - >>> client = bigquery_storage_v1.BigQueryReadClient() - >>> - >>> read_stream = client.read_stream_path('[PROJECT]', '[LOCATION]', '[SESSION]', '[STREAM]') - >>> - >>> for element in client.read_rows(read_stream): - ... # process element - ... pass - - Args: - read_stream (str): Required. Stream to read rows from. - offset (long): The offset requested must be less than the last row read from Read. - Requesting a larger offset is undefined. If not specified, start reading - from offset zero. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - Iterable[~google.cloud.bigquery_storage_v1.types.ReadRowsResponse]. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "read_rows" not in self._inner_api_calls: - self._inner_api_calls[ - "read_rows" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.read_rows, - default_retry=self._method_configs["ReadRows"].retry, - default_timeout=self._method_configs["ReadRows"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.ReadRowsRequest(read_stream=read_stream, offset=offset,) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("read_stream", read_stream)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["read_rows"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def split_read_stream( - self, - name, - fraction=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - An indicator of the behavior of a given field (for example, that a - field is required in requests, or given as output but ignored as input). - This **does not** change the behavior in protocol buffers itself; it - only denotes the behavior and may affect how API tooling handles the - field. - - Note: This enum **may** receive new values in the future. - - Example: - >>> from google.cloud import bigquery_storage_v1 - >>> - >>> client = bigquery_storage_v1.BigQueryReadClient() - >>> - >>> name = client.read_stream_path('[PROJECT]', '[LOCATION]', '[SESSION]', '[STREAM]') - >>> - >>> response = client.split_read_stream(name) - - Args: - name (str): Required. Name of the stream to split. - fraction (float): A value in the range (0.0, 1.0) that specifies the fractional point at - which the original stream should be split. The actual split point is - evaluated on pre-filtered rows, so if a filter is provided, then there is - no guarantee that the division of the rows between the new child streams - will be proportional to this fractional value. Additionally, because the - server-side unit for assigning data is collections of rows, this fraction - will always map to a data storage boundary on the server side. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "split_read_stream" not in self._inner_api_calls: - self._inner_api_calls[ - "split_read_stream" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.split_read_stream, - default_retry=self._method_configs["SplitReadStream"].retry, - default_timeout=self._method_configs["SplitReadStream"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.SplitReadStreamRequest(name=name, fraction=fraction,) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("name", name)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["split_read_stream"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) diff --git a/google/cloud/bigquery_storage_v1/gapic/big_query_read_client_config.py b/google/cloud/bigquery_storage_v1/gapic/big_query_read_client_config.py deleted file mode 100644 index 4a68352a..00000000 --- a/google/cloud/bigquery_storage_v1/gapic/big_query_read_client_config.py +++ /dev/null @@ -1,67 +0,0 @@ -config = { - "interfaces": { - "google.cloud.bigquery.storage.v1.BigQueryRead": { - "retry_codes": { - "retry_policy_1_codes": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "no_retry_codes": [], - "retry_policy_3_codes": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "retry_policy_2_codes": ["UNAVAILABLE"], - }, - "retry_params": { - "retry_policy_1_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 600000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 600000, - "total_timeout_millis": 600000, - }, - "retry_policy_2_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 86400000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 86400000, - "total_timeout_millis": 86400000, - }, - "retry_policy_3_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 600000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 600000, - "total_timeout_millis": 600000, - }, - "no_retry_params": { - "initial_retry_delay_millis": 0, - "retry_delay_multiplier": 0.0, - "max_retry_delay_millis": 0, - "initial_rpc_timeout_millis": 0, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 0, - "total_timeout_millis": 0, - }, - }, - "methods": { - "CreateReadSession": { - "timeout_millis": 120000, - "retry_codes_name": "retry_policy_1_codes", - "retry_params_name": "retry_policy_1_params", - }, - "ReadRows": { - "timeout_millis": 21600000, - "retry_codes_name": "retry_policy_2_codes", - "retry_params_name": "retry_policy_2_params", - }, - "SplitReadStream": { - "timeout_millis": 120000, - "retry_codes_name": "retry_policy_3_codes", - "retry_params_name": "retry_policy_3_params", - }, - }, - } - } -} diff --git a/google/cloud/bigquery_storage_v1/gapic/enums.py b/google/cloud/bigquery_storage_v1/gapic/enums.py deleted file mode 100644 index effa16bd..00000000 --- a/google/cloud/bigquery_storage_v1/gapic/enums.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers for protocol buffer enum types.""" - -import enum - - -class DataFormat(enum.IntEnum): - """ - Data format for input or output data. - - Attributes: - DATA_FORMAT_UNSPECIFIED (int) - AVRO (int): Avro is a standard open source row based file format. - See https://avro.apache.org/ for more details. - ARROW (int): Arrow is a standard open source column-based message format. - See https://arrow.apache.org/ for more details. - """ - - DATA_FORMAT_UNSPECIFIED = 0 - AVRO = 1 - ARROW = 2 diff --git a/google/cloud/bigquery_storage_v1/gapic/transports/__init__.py b/google/cloud/bigquery_storage_v1/gapic/transports/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1/gapic/transports/big_query_read_grpc_transport.py b/google/cloud/bigquery_storage_v1/gapic/transports/big_query_read_grpc_transport.py deleted file mode 100644 index 4370f427..00000000 --- a/google/cloud/bigquery_storage_v1/gapic/transports/big_query_read_grpc_transport.py +++ /dev/null @@ -1,184 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import google.api_core.grpc_helpers - -from google.cloud.bigquery_storage_v1.proto import storage_pb2_grpc - - -class BigQueryReadGrpcTransport(object): - """gRPC transport class providing stubs for - google.cloud.bigquery.storage.v1 BigQueryRead API. - - The transport provides access to the raw gRPC stubs, - which can be used to take advantage of advanced - features of gRPC. - """ - - # The scopes needed to make gRPC calls to all of the methods defined - # in this service. - _OAUTH_SCOPES = ( - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/bigquery.readonly", - "https://www.googleapis.com/auth/cloud-platform", - ) - - def __init__( - self, - channel=None, - credentials=None, - address="bigquerystorage.googleapis.com:443", - ): - """Instantiate the transport class. - - Args: - channel (grpc.Channel): A ``Channel`` instance through - which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - address (str): The address where the service is hosted. - """ - # If both `channel` and `credentials` are specified, raise an - # exception (channels come with credentials baked in already). - if channel is not None and credentials is not None: # pragma: no cover - raise ValueError( - "The `channel` and `credentials` arguments are mutually " "exclusive.", - ) - - # Create the channel. - if channel is None: # pragma: no cover - channel = self.create_channel( - address=address, - credentials=credentials, - options={ - "grpc.max_send_message_length": -1, - "grpc.max_receive_message_length": -1, - }.items(), - ) - - self._channel = channel - - # gRPC uses objects called "stubs" that are bound to the - # channel and provide a basic method for each RPC. - self._stubs = { - "big_query_read_stub": storage_pb2_grpc.BigQueryReadStub(channel), - } - - @classmethod - def create_channel( - cls, address="bigquerystorage.googleapis.com:443", credentials=None, **kwargs - ): - """Create and return a gRPC channel object. - - Args: - address (str): The host for the channel to use. - credentials (~.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If - none are specified, the client will attempt to ascertain - the credentials from the environment. - kwargs (dict): Keyword arguments, which are passed to the - channel creation. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return google.api_core.grpc_helpers.create_channel( # pragma: no cover - address, credentials=credentials, scopes=cls._OAUTH_SCOPES, **kwargs - ) - - @property - def channel(self): - """The gRPC channel used by the transport. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return self._channel - - @property - def create_read_session(self): - """Return the gRPC stub for :meth:`BigQueryReadClient.create_read_session`. - - Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Data is assigned to each stream such that roughly the same number of - rows can be read from each stream. Because the server-side unit for - assigning data is collections of rows, the API does not guarantee that - each stream will return the same number or rows. Additionally, the - limits are enforced based on the number of pre-filtered rows, so some - filters can lead to lopsided assignments. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_read_stub"].CreateReadSession - - @property - def read_rows(self): - """Return the gRPC stub for :meth:`BigQueryReadClient.read_rows`. - - Reads rows from the stream in the format prescribed by the ReadSession. - Each response contains one or more table rows, up to a maximum of 100 MiB - per response; read requests which attempt to read individual rows larger - than 100 MiB will fail. - - Each request also returns a set of stream statistics reflecting the current - state of the stream. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_read_stub"].ReadRows - - @property - def split_read_stream(self): - """Return the gRPC stub for :meth:`BigQueryReadClient.split_read_stream`. - - An indicator of the behavior of a given field (for example, that a - field is required in requests, or given as output but ignored as input). - This **does not** change the behavior in protocol buffers itself; it - only denotes the behavior and may affect how API tooling handles the - field. - - Note: This enum **may** receive new values in the future. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_read_stub"].SplitReadStream diff --git a/google/cloud/bigquery_storage_v1/proto/__init__.py b/google/cloud/bigquery_storage_v1/proto/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1/proto/arrow_pb2.py b/google/cloud/bigquery_storage_v1/proto/arrow_pb2.py deleted file mode 100644 index 32a8feda..00000000 --- a/google/cloud/bigquery_storage_v1/proto/arrow_pb2.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1/proto/arrow.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1/proto/arrow.proto", - package="google.cloud.bigquery.storage.v1", - syntax="proto3", - serialized_options=b"\n$com.google.cloud.bigquery.storage.v1B\nArrowProtoP\001ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\252\002 Google.Cloud.BigQuery.Storage.V1\312\002 Google\\Cloud\\BigQuery\\Storage\\V1", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n2google/cloud/bigquery_storage_v1/proto/arrow.proto\x12 google.cloud.bigquery.storage.v1"(\n\x0b\x41rrowSchema\x12\x19\n\x11serialized_schema\x18\x01 \x01(\x0c"F\n\x10\x41rrowRecordBatch\x12\x1f\n\x17serialized_record_batch\x18\x01 \x01(\x0c\x12\x11\n\trow_count\x18\x02 \x01(\x03\x42\xc3\x01\n$com.google.cloud.bigquery.storage.v1B\nArrowProtoP\x01ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\xaa\x02 Google.Cloud.BigQuery.Storage.V1\xca\x02 Google\\Cloud\\BigQuery\\Storage\\V1b\x06proto3', -) - - -_ARROWSCHEMA = _descriptor.Descriptor( - name="ArrowSchema", - full_name="google.cloud.bigquery.storage.v1.ArrowSchema", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_schema", - full_name="google.cloud.bigquery.storage.v1.ArrowSchema.serialized_schema", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=88, - serialized_end=128, -) - - -_ARROWRECORDBATCH = _descriptor.Descriptor( - name="ArrowRecordBatch", - full_name="google.cloud.bigquery.storage.v1.ArrowRecordBatch", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_record_batch", - full_name="google.cloud.bigquery.storage.v1.ArrowRecordBatch.serialized_record_batch", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1.ArrowRecordBatch.row_count", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=130, - serialized_end=200, -) - -DESCRIPTOR.message_types_by_name["ArrowSchema"] = _ARROWSCHEMA -DESCRIPTOR.message_types_by_name["ArrowRecordBatch"] = _ARROWRECORDBATCH -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ArrowSchema = _reflection.GeneratedProtocolMessageType( - "ArrowSchema", - (_message.Message,), - { - "DESCRIPTOR": _ARROWSCHEMA, - "__module__": "google.cloud.bigquery_storage_v1.proto.arrow_pb2", - "__doc__": """Arrow schema as specified in - https://arrow.apache.org/docs/python/api/datatypes.html and serialized - to bytes using IPC: - https://arrow.apache.org/docs/format/Columnar.html#serialization-and- - interprocess-communication-ipc See code samples on how this message - can be deserialized. - - Attributes: - serialized_schema: - IPC serialized Arrow schema. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ArrowSchema) - }, -) -_sym_db.RegisterMessage(ArrowSchema) - -ArrowRecordBatch = _reflection.GeneratedProtocolMessageType( - "ArrowRecordBatch", - (_message.Message,), - { - "DESCRIPTOR": _ARROWRECORDBATCH, - "__module__": "google.cloud.bigquery_storage_v1.proto.arrow_pb2", - "__doc__": """Arrow RecordBatch. - - Attributes: - serialized_record_batch: - IPC-serialized Arrow RecordBatch. - row_count: - The count of rows in ``serialized_record_batch``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ArrowRecordBatch) - }, -) -_sym_db.RegisterMessage(ArrowRecordBatch) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1/proto/arrow_pb2_grpc.py b/google/cloud/bigquery_storage_v1/proto/arrow_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1/proto/arrow_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1/proto/avro_pb2.py b/google/cloud/bigquery_storage_v1/proto/avro_pb2.py deleted file mode 100644 index af6fcf49..00000000 --- a/google/cloud/bigquery_storage_v1/proto/avro_pb2.py +++ /dev/null @@ -1,168 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1/proto/avro.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1/proto/avro.proto", - package="google.cloud.bigquery.storage.v1", - syntax="proto3", - serialized_options=b"\n$com.google.cloud.bigquery.storage.v1B\tAvroProtoP\001ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\252\002 Google.Cloud.BigQuery.Storage.V1\312\002 Google\\Cloud\\BigQuery\\Storage\\V1", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n1google/cloud/bigquery_storage_v1/proto/avro.proto\x12 google.cloud.bigquery.storage.v1"\x1c\n\nAvroSchema\x12\x0e\n\x06schema\x18\x01 \x01(\t"=\n\x08\x41vroRows\x12\x1e\n\x16serialized_binary_rows\x18\x01 \x01(\x0c\x12\x11\n\trow_count\x18\x02 \x01(\x03\x42\xc2\x01\n$com.google.cloud.bigquery.storage.v1B\tAvroProtoP\x01ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\xaa\x02 Google.Cloud.BigQuery.Storage.V1\xca\x02 Google\\Cloud\\BigQuery\\Storage\\V1b\x06proto3', -) - - -_AVROSCHEMA = _descriptor.Descriptor( - name="AvroSchema", - full_name="google.cloud.bigquery.storage.v1.AvroSchema", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="schema", - full_name="google.cloud.bigquery.storage.v1.AvroSchema.schema", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=87, - serialized_end=115, -) - - -_AVROROWS = _descriptor.Descriptor( - name="AvroRows", - full_name="google.cloud.bigquery.storage.v1.AvroRows", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_binary_rows", - full_name="google.cloud.bigquery.storage.v1.AvroRows.serialized_binary_rows", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1.AvroRows.row_count", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=117, - serialized_end=178, -) - -DESCRIPTOR.message_types_by_name["AvroSchema"] = _AVROSCHEMA -DESCRIPTOR.message_types_by_name["AvroRows"] = _AVROROWS -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -AvroSchema = _reflection.GeneratedProtocolMessageType( - "AvroSchema", - (_message.Message,), - { - "DESCRIPTOR": _AVROSCHEMA, - "__module__": "google.cloud.bigquery_storage_v1.proto.avro_pb2", - "__doc__": """Avro schema. - - Attributes: - schema: - Json serialized schema, as described at - https://avro.apache.org/docs/1.8.1/spec.html. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.AvroSchema) - }, -) -_sym_db.RegisterMessage(AvroSchema) - -AvroRows = _reflection.GeneratedProtocolMessageType( - "AvroRows", - (_message.Message,), - { - "DESCRIPTOR": _AVROROWS, - "__module__": "google.cloud.bigquery_storage_v1.proto.avro_pb2", - "__doc__": """Avro rows. - - Attributes: - serialized_binary_rows: - Binary serialized rows in a block. - row_count: - The count of rows in the returning block. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.AvroRows) - }, -) -_sym_db.RegisterMessage(AvroRows) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1/proto/avro_pb2_grpc.py b/google/cloud/bigquery_storage_v1/proto/avro_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1/proto/avro_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1/proto/storage_pb2.py b/google/cloud/bigquery_storage_v1/proto/storage_pb2.py deleted file mode 100644 index 40f852f8..00000000 --- a/google/cloud/bigquery_storage_v1/proto/storage_pb2.py +++ /dev/null @@ -1,870 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1/proto/storage.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import resource_pb2 as google_dot_api_dot_resource__pb2 -from google.cloud.bigquery_storage_v1.proto import ( - arrow_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_arrow__pb2, -) -from google.cloud.bigquery_storage_v1.proto import ( - avro_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_avro__pb2, -) -from google.cloud.bigquery_storage_v1.proto import ( - stream_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2, -) - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1/proto/storage.proto", - package="google.cloud.bigquery.storage.v1", - syntax="proto3", - serialized_options=b"\n$com.google.cloud.bigquery.storage.v1B\014StorageProtoP\001ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\252\002 Google.Cloud.BigQuery.Storage.V1\312\002 Google\\Cloud\\BigQuery\\Storage\\V1\352AU\n\035bigquery.googleapis.com/Table\0224projects/{project}/datasets/{dataset}/tables/{table}", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n4google/cloud/bigquery_storage_v1/proto/storage.proto\x12 google.cloud.bigquery.storage.v1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\x1a\x32google/cloud/bigquery_storage_v1/proto/arrow.proto\x1a\x31google/cloud/bigquery_storage_v1/proto/avro.proto\x1a\x33google/cloud/bigquery_storage_v1/proto/stream.proto"\xc3\x01\n\x18\x43reateReadSessionRequest\x12\x43\n\x06parent\x18\x01 \x01(\tB3\xe0\x41\x02\xfa\x41-\n+cloudresourcemanager.googleapis.com/Project\x12H\n\x0cread_session\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1.ReadSessionB\x03\xe0\x41\x02\x12\x18\n\x10max_stream_count\x18\x03 \x01(\x05"i\n\x0fReadRowsRequest\x12\x46\n\x0bread_stream\x18\x01 \x01(\tB1\xe0\x41\x02\xfa\x41+\n)bigquerystorage.googleapis.com/ReadStream\x12\x0e\n\x06offset\x18\x02 \x01(\x03")\n\rThrottleState\x12\x18\n\x10throttle_percent\x18\x01 \x01(\x05"\x97\x01\n\x0bStreamStats\x12H\n\x08progress\x18\x02 \x01(\x0b\x32\x36.google.cloud.bigquery.storage.v1.StreamStats.Progress\x1a>\n\x08Progress\x12\x19\n\x11\x61t_response_start\x18\x01 \x01(\x01\x12\x17\n\x0f\x61t_response_end\x18\x02 \x01(\x01"\xc7\x02\n\x10ReadRowsResponse\x12?\n\tavro_rows\x18\x03 \x01(\x0b\x32*.google.cloud.bigquery.storage.v1.AvroRowsH\x00\x12P\n\x12\x61rrow_record_batch\x18\x04 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1.ArrowRecordBatchH\x00\x12\x11\n\trow_count\x18\x06 \x01(\x03\x12<\n\x05stats\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1.StreamStats\x12G\n\x0ethrottle_state\x18\x05 \x01(\x0b\x32/.google.cloud.bigquery.storage.v1.ThrottleStateB\x06\n\x04rows"k\n\x16SplitReadStreamRequest\x12?\n\x04name\x18\x01 \x01(\tB1\xe0\x41\x02\xfa\x41+\n)bigquerystorage.googleapis.com/ReadStream\x12\x10\n\x08\x66raction\x18\x02 \x01(\x01"\xa7\x01\n\x17SplitReadStreamResponse\x12\x44\n\x0eprimary_stream\x18\x01 \x01(\x0b\x32,.google.cloud.bigquery.storage.v1.ReadStream\x12\x46\n\x10remainder_stream\x18\x02 \x01(\x0b\x32,.google.cloud.bigquery.storage.v1.ReadStream2\xc6\x06\n\x0c\x42igQueryRead\x12\xe9\x01\n\x11\x43reateReadSession\x12:.google.cloud.bigquery.storage.v1.CreateReadSessionRequest\x1a-.google.cloud.bigquery.storage.v1.ReadSession"i\x82\xd3\xe4\x93\x02<"7/v1/{read_session.table=projects/*/datasets/*/tables/*}:\x01*\xda\x41$parent,read_session,max_stream_count\x12\xcf\x01\n\x08ReadRows\x12\x31.google.cloud.bigquery.storage.v1.ReadRowsRequest\x1a\x32.google.cloud.bigquery.storage.v1.ReadRowsResponse"Z\x82\xd3\xe4\x93\x02?\x12=/v1/{read_stream=projects/*/locations/*/sessions/*/streams/*}\xda\x41\x12read_stream,offset0\x01\x12\xc6\x01\n\x0fSplitReadStream\x12\x38.google.cloud.bigquery.storage.v1.SplitReadStreamRequest\x1a\x39.google.cloud.bigquery.storage.v1.SplitReadStreamResponse">\x82\xd3\xe4\x93\x02\x38\x12\x36/v1/{name=projects/*/locations/*/sessions/*/streams/*}\x1a\xae\x01\xca\x41\x1e\x62igquerystorage.googleapis.com\xd2\x41\x89\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platformB\x9d\x02\n$com.google.cloud.bigquery.storage.v1B\x0cStorageProtoP\x01ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\xaa\x02 Google.Cloud.BigQuery.Storage.V1\xca\x02 Google\\Cloud\\BigQuery\\Storage\\V1\xea\x41U\n\x1d\x62igquery.googleapis.com/Table\x12\x34projects/{project}/datasets/{dataset}/tables/{table}b\x06proto3', - dependencies=[ - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_resource__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_arrow__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_avro__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2.DESCRIPTOR, - ], -) - - -_CREATEREADSESSIONREQUEST = _descriptor.Descriptor( - name="CreateReadSessionRequest", - full_name="google.cloud.bigquery.storage.v1.CreateReadSessionRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="parent", - full_name="google.cloud.bigquery.storage.v1.CreateReadSessionRequest.parent", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A-\n+cloudresourcemanager.googleapis.com/Project", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="read_session", - full_name="google.cloud.bigquery.storage.v1.CreateReadSessionRequest.read_session", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="max_stream_count", - full_name="google.cloud.bigquery.storage.v1.CreateReadSessionRequest.max_stream_count", - index=2, - number=3, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=362, - serialized_end=557, -) - - -_READROWSREQUEST = _descriptor.Descriptor( - name="ReadRowsRequest", - full_name="google.cloud.bigquery.storage.v1.ReadRowsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="read_stream", - full_name="google.cloud.bigquery.storage.v1.ReadRowsRequest.read_stream", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A+\n)bigquerystorage.googleapis.com/ReadStream", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="offset", - full_name="google.cloud.bigquery.storage.v1.ReadRowsRequest.offset", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=559, - serialized_end=664, -) - - -_THROTTLESTATE = _descriptor.Descriptor( - name="ThrottleState", - full_name="google.cloud.bigquery.storage.v1.ThrottleState", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="throttle_percent", - full_name="google.cloud.bigquery.storage.v1.ThrottleState.throttle_percent", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=666, - serialized_end=707, -) - - -_STREAMSTATS_PROGRESS = _descriptor.Descriptor( - name="Progress", - full_name="google.cloud.bigquery.storage.v1.StreamStats.Progress", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="at_response_start", - full_name="google.cloud.bigquery.storage.v1.StreamStats.Progress.at_response_start", - index=0, - number=1, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="at_response_end", - full_name="google.cloud.bigquery.storage.v1.StreamStats.Progress.at_response_end", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=799, - serialized_end=861, -) - -_STREAMSTATS = _descriptor.Descriptor( - name="StreamStats", - full_name="google.cloud.bigquery.storage.v1.StreamStats", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="progress", - full_name="google.cloud.bigquery.storage.v1.StreamStats.progress", - index=0, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_STREAMSTATS_PROGRESS,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=710, - serialized_end=861, -) - - -_READROWSRESPONSE = _descriptor.Descriptor( - name="ReadRowsResponse", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="avro_rows", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse.avro_rows", - index=0, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_record_batch", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse.arrow_record_batch", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse.row_count", - index=2, - number=6, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="stats", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse.stats", - index=3, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="throttle_state", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse.throttle_state", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="rows", - full_name="google.cloud.bigquery.storage.v1.ReadRowsResponse.rows", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=864, - serialized_end=1191, -) - - -_SPLITREADSTREAMREQUEST = _descriptor.Descriptor( - name="SplitReadStreamRequest", - full_name="google.cloud.bigquery.storage.v1.SplitReadStreamRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1.SplitReadStreamRequest.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A+\n)bigquerystorage.googleapis.com/ReadStream", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="fraction", - full_name="google.cloud.bigquery.storage.v1.SplitReadStreamRequest.fraction", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1193, - serialized_end=1300, -) - - -_SPLITREADSTREAMRESPONSE = _descriptor.Descriptor( - name="SplitReadStreamResponse", - full_name="google.cloud.bigquery.storage.v1.SplitReadStreamResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="primary_stream", - full_name="google.cloud.bigquery.storage.v1.SplitReadStreamResponse.primary_stream", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="remainder_stream", - full_name="google.cloud.bigquery.storage.v1.SplitReadStreamResponse.remainder_stream", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1303, - serialized_end=1470, -) - -_CREATEREADSESSIONREQUEST.fields_by_name[ - "read_session" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2._READSESSION -) -_STREAMSTATS_PROGRESS.containing_type = _STREAMSTATS -_STREAMSTATS.fields_by_name["progress"].message_type = _STREAMSTATS_PROGRESS -_READROWSRESPONSE.fields_by_name[ - "avro_rows" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_avro__pb2._AVROROWS -) -_READROWSRESPONSE.fields_by_name[ - "arrow_record_batch" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_arrow__pb2._ARROWRECORDBATCH -) -_READROWSRESPONSE.fields_by_name["stats"].message_type = _STREAMSTATS -_READROWSRESPONSE.fields_by_name["throttle_state"].message_type = _THROTTLESTATE -_READROWSRESPONSE.oneofs_by_name["rows"].fields.append( - _READROWSRESPONSE.fields_by_name["avro_rows"] -) -_READROWSRESPONSE.fields_by_name[ - "avro_rows" -].containing_oneof = _READROWSRESPONSE.oneofs_by_name["rows"] -_READROWSRESPONSE.oneofs_by_name["rows"].fields.append( - _READROWSRESPONSE.fields_by_name["arrow_record_batch"] -) -_READROWSRESPONSE.fields_by_name[ - "arrow_record_batch" -].containing_oneof = _READROWSRESPONSE.oneofs_by_name["rows"] -_SPLITREADSTREAMRESPONSE.fields_by_name[ - "primary_stream" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2._READSTREAM -) -_SPLITREADSTREAMRESPONSE.fields_by_name[ - "remainder_stream" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2._READSTREAM -) -DESCRIPTOR.message_types_by_name["CreateReadSessionRequest"] = _CREATEREADSESSIONREQUEST -DESCRIPTOR.message_types_by_name["ReadRowsRequest"] = _READROWSREQUEST -DESCRIPTOR.message_types_by_name["ThrottleState"] = _THROTTLESTATE -DESCRIPTOR.message_types_by_name["StreamStats"] = _STREAMSTATS -DESCRIPTOR.message_types_by_name["ReadRowsResponse"] = _READROWSRESPONSE -DESCRIPTOR.message_types_by_name["SplitReadStreamRequest"] = _SPLITREADSTREAMREQUEST -DESCRIPTOR.message_types_by_name["SplitReadStreamResponse"] = _SPLITREADSTREAMRESPONSE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -CreateReadSessionRequest = _reflection.GeneratedProtocolMessageType( - "CreateReadSessionRequest", - (_message.Message,), - { - "DESCRIPTOR": _CREATEREADSESSIONREQUEST, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Request message for ``CreateReadSession``. - - Attributes: - parent: - Required. The request project that owns the session, in the - form of ``projects/{project_id}``. - read_session: - Required. Session to be created. - max_stream_count: - Max initial number of streams. If unset or zero, the server - will provide a value of streams so as to produce reasonable - throughput. Must be non-negative. The number of streams may be - lower than the requested number, depending on the amount - parallelism that is reasonable for the table. Error will be - returned if the max count is greater than the current system - max limit of 1,000. Streams must be read starting from offset - 0. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.CreateReadSessionRequest) - }, -) -_sym_db.RegisterMessage(CreateReadSessionRequest) - -ReadRowsRequest = _reflection.GeneratedProtocolMessageType( - "ReadRowsRequest", - (_message.Message,), - { - "DESCRIPTOR": _READROWSREQUEST, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Request message for ``ReadRows``. - - Attributes: - read_stream: - Required. Stream to read rows from. - offset: - The offset requested must be less than the last row read from - Read. Requesting a larger offset is undefined. If not - specified, start reading from offset zero. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ReadRowsRequest) - }, -) -_sym_db.RegisterMessage(ReadRowsRequest) - -ThrottleState = _reflection.GeneratedProtocolMessageType( - "ThrottleState", - (_message.Message,), - { - "DESCRIPTOR": _THROTTLESTATE, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Information on if the current connection is being throttled. - - Attributes: - throttle_percent: - How much this connection is being throttled. Zero means no - throttling, 100 means fully throttled. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ThrottleState) - }, -) -_sym_db.RegisterMessage(ThrottleState) - -StreamStats = _reflection.GeneratedProtocolMessageType( - "StreamStats", - (_message.Message,), - { - "Progress": _reflection.GeneratedProtocolMessageType( - "Progress", - (_message.Message,), - { - "DESCRIPTOR": _STREAMSTATS_PROGRESS, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Protocol buffer. - - Attributes: - at_response_start: - The fraction of rows assigned to the stream that have been - processed by the server so far, not including the rows in the - current response message. This value, along with - ``at_response_end``, can be used to interpolate the progress - made as the rows in the message are being processed using the - following formula: ``at_response_start + (at_response_end - - at_response_start) * rows_processed_from_response / - rows_in_response``. Note that if a filter is provided, the - ``at_response_end`` value of the previous response may not - necessarily be equal to the ``at_response_start`` value of the - current response. - at_response_end: - Similar to ``at_response_start``, except that this value - includes the rows in the current response. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.StreamStats.Progress) - }, - ), - "DESCRIPTOR": _STREAMSTATS, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Estimated stream statistics for a given Stream. - - Attributes: - progress: - Represents the progress of the current stream. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.StreamStats) - }, -) -_sym_db.RegisterMessage(StreamStats) -_sym_db.RegisterMessage(StreamStats.Progress) - -ReadRowsResponse = _reflection.GeneratedProtocolMessageType( - "ReadRowsResponse", - (_message.Message,), - { - "DESCRIPTOR": _READROWSRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Response from calling ``ReadRows`` may include row data, progress and - throttling information. - - Attributes: - rows: - Row data is returned in format specified during session - creation. - avro_rows: - Serialized row data in AVRO format. - arrow_record_batch: - Serialized row data in Arrow RecordBatch format. - row_count: - Number of serialized rows in the rows block. - stats: - Statistics for the stream. - throttle_state: - Throttling state. If unset, the latest response still - describes the current throttling status. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ReadRowsResponse) - }, -) -_sym_db.RegisterMessage(ReadRowsResponse) - -SplitReadStreamRequest = _reflection.GeneratedProtocolMessageType( - "SplitReadStreamRequest", - (_message.Message,), - { - "DESCRIPTOR": _SPLITREADSTREAMREQUEST, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Request message for ``SplitReadStream``. - - Attributes: - name: - Required. Name of the stream to split. - fraction: - A value in the range (0.0, 1.0) that specifies the fractional - point at which the original stream should be split. The actual - split point is evaluated on pre-filtered rows, so if a filter - is provided, then there is no guarantee that the division of - the rows between the new child streams will be proportional to - this fractional value. Additionally, because the server-side - unit for assigning data is collections of rows, this fraction - will always map to a data storage boundary on the server side. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.SplitReadStreamRequest) - }, -) -_sym_db.RegisterMessage(SplitReadStreamRequest) - -SplitReadStreamResponse = _reflection.GeneratedProtocolMessageType( - "SplitReadStreamResponse", - (_message.Message,), - { - "DESCRIPTOR": _SPLITREADSTREAMRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1.proto.storage_pb2", - "__doc__": """Response message for ``SplitReadStream``. - - Attributes: - primary_stream: - Primary stream, which contains the beginning portion of - \|original_stream|. An empty value indicates that the original - stream can no longer be split. - remainder_stream: - Remainder stream, which contains the tail of - \|original_stream|. An empty value indicates that the original - stream can no longer be split. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.SplitReadStreamResponse) - }, -) -_sym_db.RegisterMessage(SplitReadStreamResponse) - - -DESCRIPTOR._options = None -_CREATEREADSESSIONREQUEST.fields_by_name["parent"]._options = None -_CREATEREADSESSIONREQUEST.fields_by_name["read_session"]._options = None -_READROWSREQUEST.fields_by_name["read_stream"]._options = None -_SPLITREADSTREAMREQUEST.fields_by_name["name"]._options = None - -_BIGQUERYREAD = _descriptor.ServiceDescriptor( - name="BigQueryRead", - full_name="google.cloud.bigquery.storage.v1.BigQueryRead", - file=DESCRIPTOR, - index=0, - serialized_options=b"\312A\036bigquerystorage.googleapis.com\322A\211\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform", - create_key=_descriptor._internal_create_key, - serialized_start=1473, - serialized_end=2311, - methods=[ - _descriptor.MethodDescriptor( - name="CreateReadSession", - full_name="google.cloud.bigquery.storage.v1.BigQueryRead.CreateReadSession", - index=0, - containing_service=None, - input_type=_CREATEREADSESSIONREQUEST, - output_type=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2._READSESSION, - serialized_options=b'\202\323\344\223\002<"7/v1/{read_session.table=projects/*/datasets/*/tables/*}:\001*\332A$parent,read_session,max_stream_count', - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="ReadRows", - full_name="google.cloud.bigquery.storage.v1.BigQueryRead.ReadRows", - index=1, - containing_service=None, - input_type=_READROWSREQUEST, - output_type=_READROWSRESPONSE, - serialized_options=b"\202\323\344\223\002?\022=/v1/{read_stream=projects/*/locations/*/sessions/*/streams/*}\332A\022read_stream,offset", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="SplitReadStream", - full_name="google.cloud.bigquery.storage.v1.BigQueryRead.SplitReadStream", - index=2, - containing_service=None, - input_type=_SPLITREADSTREAMREQUEST, - output_type=_SPLITREADSTREAMRESPONSE, - serialized_options=b"\202\323\344\223\0028\0226/v1/{name=projects/*/locations/*/sessions/*/streams/*}", - create_key=_descriptor._internal_create_key, - ), - ], -) -_sym_db.RegisterServiceDescriptor(_BIGQUERYREAD) - -DESCRIPTOR.services_by_name["BigQueryRead"] = _BIGQUERYREAD - -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1/proto/storage_pb2_grpc.py b/google/cloud/bigquery_storage_v1/proto/storage_pb2_grpc.py deleted file mode 100644 index 6ca48045..00000000 --- a/google/cloud/bigquery_storage_v1/proto/storage_pb2_grpc.py +++ /dev/null @@ -1,215 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -from google.cloud.bigquery_storage_v1.proto import ( - storage_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2, -) -from google.cloud.bigquery_storage_v1.proto import ( - stream_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2, -) - - -class BigQueryReadStub(object): - """BigQuery Read API. - - The Read API can be used to read data from BigQuery. - """ - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.CreateReadSession = channel.unary_unary( - "/google.cloud.bigquery.storage.v1.BigQueryRead/CreateReadSession", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2.ReadSession.FromString, - ) - self.ReadRows = channel.unary_stream( - "/google.cloud.bigquery.storage.v1.BigQueryRead/ReadRows", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.ReadRowsRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.ReadRowsResponse.FromString, - ) - self.SplitReadStream = channel.unary_unary( - "/google.cloud.bigquery.storage.v1.BigQueryRead/SplitReadStream", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.FromString, - ) - - -class BigQueryReadServicer(object): - """BigQuery Read API. - - The Read API can be used to read data from BigQuery. - """ - - def CreateReadSession(self, request, context): - """Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Data is assigned to each stream such that roughly the same number of - rows can be read from each stream. Because the server-side unit for - assigning data is collections of rows, the API does not guarantee that - each stream will return the same number or rows. Additionally, the - limits are enforced based on the number of pre-filtered rows, so some - filters can lead to lopsided assignments. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ReadRows(self, request, context): - """Reads rows from the stream in the format prescribed by the ReadSession. - Each response contains one or more table rows, up to a maximum of 100 MiB - per response; read requests which attempt to read individual rows larger - than 100 MiB will fail. - - Each request also returns a set of stream statistics reflecting the current - state of the stream. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def SplitReadStream(self, request, context): - """Splits a given `ReadStream` into two `ReadStream` objects. These - `ReadStream` objects are referred to as the primary and the residual - streams of the split. The original `ReadStream` can still be read from in - the same manner as before. Both of the returned `ReadStream` objects can - also be read from, and the rows returned by both child streams will be - the same as the rows read from the original stream. - - Moreover, the two child streams will be allocated back-to-back in the - original `ReadStream`. Concretely, it is guaranteed that for streams - original, primary, and residual, that original[0-j] = primary[0-j] and - original[j-n] = residual[0-m] once the streams have been read to - completion. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_BigQueryReadServicer_to_server(servicer, server): - rpc_method_handlers = { - "CreateReadSession": grpc.unary_unary_rpc_method_handler( - servicer.CreateReadSession, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2.ReadSession.SerializeToString, - ), - "ReadRows": grpc.unary_stream_rpc_method_handler( - servicer.ReadRows, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.ReadRowsRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.ReadRowsResponse.SerializeToString, - ), - "SplitReadStream": grpc.unary_unary_rpc_method_handler( - servicer.SplitReadStream, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - "google.cloud.bigquery.storage.v1.BigQueryRead", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) - - -# This class is part of an EXPERIMENTAL API. -class BigQueryRead(object): - """BigQuery Read API. - - The Read API can be used to read data from BigQuery. - """ - - @staticmethod - def CreateReadSession( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.storage.v1.BigQueryRead/CreateReadSession", - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_stream__pb2.ReadSession.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ReadRows( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_stream( - request, - target, - "/google.cloud.bigquery.storage.v1.BigQueryRead/ReadRows", - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.ReadRowsRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.ReadRowsResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def SplitReadStream( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.storage.v1.BigQueryRead/SplitReadStream", - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) diff --git a/google/cloud/bigquery_storage_v1/proto/stream_pb2.py b/google/cloud/bigquery_storage_v1/proto/stream_pb2.py deleted file mode 100644 index ee00c7b7..00000000 --- a/google/cloud/bigquery_storage_v1/proto/stream_pb2.py +++ /dev/null @@ -1,599 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1/proto/stream.proto -"""Generated protocol buffer code.""" -from google.protobuf.internal import enum_type_wrapper -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import resource_pb2 as google_dot_api_dot_resource__pb2 -from google.cloud.bigquery_storage_v1.proto import ( - arrow_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_arrow__pb2, -) -from google.cloud.bigquery_storage_v1.proto import ( - avro_pb2 as google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_avro__pb2, -) -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1/proto/stream.proto", - package="google.cloud.bigquery.storage.v1", - syntax="proto3", - serialized_options=b"\n$com.google.cloud.bigquery.storage.v1B\013StreamProtoP\001ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\252\002 Google.Cloud.BigQuery.Storage.V1\312\002 Google\\Cloud\\BigQuery\\Storage\\V1", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n3google/cloud/bigquery_storage_v1/proto/stream.proto\x12 google.cloud.bigquery.storage.v1\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\x1a\x32google/cloud/bigquery_storage_v1/proto/arrow.proto\x1a\x31google/cloud/bigquery_storage_v1/proto/avro.proto\x1a\x1fgoogle/protobuf/timestamp.proto"\xe7\x06\n\x0bReadSession\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x34\n\x0b\x65xpire_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.TimestampB\x03\xe0\x41\x03\x12\x46\n\x0b\x64\x61ta_format\x18\x03 \x01(\x0e\x32,.google.cloud.bigquery.storage.v1.DataFormatB\x03\xe0\x41\x05\x12H\n\x0b\x61vro_schema\x18\x04 \x01(\x0b\x32,.google.cloud.bigquery.storage.v1.AvroSchemaB\x03\xe0\x41\x03H\x00\x12J\n\x0c\x61rrow_schema\x18\x05 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1.ArrowSchemaB\x03\xe0\x41\x03H\x00\x12\x34\n\x05table\x18\x06 \x01(\tB%\xe0\x41\x05\xfa\x41\x1f\n\x1d\x62igquery.googleapis.com/Table\x12Z\n\x0ftable_modifiers\x18\x07 \x01(\x0b\x32<.google.cloud.bigquery.storage.v1.ReadSession.TableModifiersB\x03\xe0\x41\x01\x12Y\n\x0cread_options\x18\x08 \x01(\x0b\x32>.google.cloud.bigquery.storage.v1.ReadSession.TableReadOptionsB\x03\xe0\x41\x01\x12\x42\n\x07streams\x18\n \x03(\x0b\x32,.google.cloud.bigquery.storage.v1.ReadStreamB\x03\xe0\x41\x03\x1a\x43\n\x0eTableModifiers\x12\x31\n\rsnapshot_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x1a\x44\n\x10TableReadOptions\x12\x17\n\x0fselected_fields\x18\x01 \x03(\t\x12\x17\n\x0frow_restriction\x18\x02 \x01(\t:k\xea\x41h\n*bigquerystorage.googleapis.com/ReadSession\x12:projects/{project}/locations/{location}/sessions/{session}B\x08\n\x06schema"\x9c\x01\n\nReadStream\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x03:{\xea\x41x\n)bigquerystorage.googleapis.com/ReadStream\x12Kprojects/{project}/locations/{location}/sessions/{session}/streams/{stream}*>\n\nDataFormat\x12\x1b\n\x17\x44\x41TA_FORMAT_UNSPECIFIED\x10\x00\x12\x08\n\x04\x41VRO\x10\x01\x12\t\n\x05\x41RROW\x10\x02\x42\xc4\x01\n$com.google.cloud.bigquery.storage.v1B\x0bStreamProtoP\x01ZGgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage\xaa\x02 Google.Cloud.BigQuery.Storage.V1\xca\x02 Google\\Cloud\\BigQuery\\Storage\\V1b\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_resource__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_arrow__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_avro__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - ], -) - -_DATAFORMAT = _descriptor.EnumDescriptor( - name="DataFormat", - full_name="google.cloud.bigquery.storage.v1.DataFormat", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DATA_FORMAT_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="AVRO", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARROW", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1318, - serialized_end=1380, -) -_sym_db.RegisterEnumDescriptor(_DATAFORMAT) - -DataFormat = enum_type_wrapper.EnumTypeWrapper(_DATAFORMAT) -DATA_FORMAT_UNSPECIFIED = 0 -AVRO = 1 -ARROW = 2 - - -_READSESSION_TABLEMODIFIERS = _descriptor.Descriptor( - name="TableModifiers", - full_name="google.cloud.bigquery.storage.v1.ReadSession.TableModifiers", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="snapshot_time", - full_name="google.cloud.bigquery.storage.v1.ReadSession.TableModifiers.snapshot_time", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=901, - serialized_end=968, -) - -_READSESSION_TABLEREADOPTIONS = _descriptor.Descriptor( - name="TableReadOptions", - full_name="google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="selected_fields", - full_name="google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions.selected_fields", - index=0, - number=1, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_restriction", - full_name="google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions.row_restriction", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=970, - serialized_end=1038, -) - -_READSESSION = _descriptor.Descriptor( - name="ReadSession", - full_name="google.cloud.bigquery.storage.v1.ReadSession", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1.ReadSession.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="expire_time", - full_name="google.cloud.bigquery.storage.v1.ReadSession.expire_time", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_format", - full_name="google.cloud.bigquery.storage.v1.ReadSession.data_format", - index=2, - number=3, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\005", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="avro_schema", - full_name="google.cloud.bigquery.storage.v1.ReadSession.avro_schema", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_schema", - full_name="google.cloud.bigquery.storage.v1.ReadSession.arrow_schema", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table", - full_name="google.cloud.bigquery.storage.v1.ReadSession.table", - index=5, - number=6, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\005\372A\037\n\035bigquery.googleapis.com/Table", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table_modifiers", - full_name="google.cloud.bigquery.storage.v1.ReadSession.table_modifiers", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="read_options", - full_name="google.cloud.bigquery.storage.v1.ReadSession.read_options", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="streams", - full_name="google.cloud.bigquery.storage.v1.ReadSession.streams", - index=8, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_READSESSION_TABLEMODIFIERS, _READSESSION_TABLEREADOPTIONS,], - enum_types=[], - serialized_options=b"\352Ah\n*bigquerystorage.googleapis.com/ReadSession\022:projects/{project}/locations/{location}/sessions/{session}", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="schema", - full_name="google.cloud.bigquery.storage.v1.ReadSession.schema", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=286, - serialized_end=1157, -) - - -_READSTREAM = _descriptor.Descriptor( - name="ReadStream", - full_name="google.cloud.bigquery.storage.v1.ReadStream", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1.ReadStream.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"\352Ax\n)bigquerystorage.googleapis.com/ReadStream\022Kprojects/{project}/locations/{location}/sessions/{session}/streams/{stream}", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1160, - serialized_end=1316, -) - -_READSESSION_TABLEMODIFIERS.fields_by_name[ - "snapshot_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_READSESSION_TABLEMODIFIERS.containing_type = _READSESSION -_READSESSION_TABLEREADOPTIONS.containing_type = _READSESSION -_READSESSION.fields_by_name[ - "expire_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_READSESSION.fields_by_name["data_format"].enum_type = _DATAFORMAT -_READSESSION.fields_by_name[ - "avro_schema" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_avro__pb2._AVROSCHEMA -) -_READSESSION.fields_by_name[ - "arrow_schema" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1_dot_proto_dot_arrow__pb2._ARROWSCHEMA -) -_READSESSION.fields_by_name[ - "table_modifiers" -].message_type = _READSESSION_TABLEMODIFIERS -_READSESSION.fields_by_name["read_options"].message_type = _READSESSION_TABLEREADOPTIONS -_READSESSION.fields_by_name["streams"].message_type = _READSTREAM -_READSESSION.oneofs_by_name["schema"].fields.append( - _READSESSION.fields_by_name["avro_schema"] -) -_READSESSION.fields_by_name[ - "avro_schema" -].containing_oneof = _READSESSION.oneofs_by_name["schema"] -_READSESSION.oneofs_by_name["schema"].fields.append( - _READSESSION.fields_by_name["arrow_schema"] -) -_READSESSION.fields_by_name[ - "arrow_schema" -].containing_oneof = _READSESSION.oneofs_by_name["schema"] -DESCRIPTOR.message_types_by_name["ReadSession"] = _READSESSION -DESCRIPTOR.message_types_by_name["ReadStream"] = _READSTREAM -DESCRIPTOR.enum_types_by_name["DataFormat"] = _DATAFORMAT -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ReadSession = _reflection.GeneratedProtocolMessageType( - "ReadSession", - (_message.Message,), - { - "TableModifiers": _reflection.GeneratedProtocolMessageType( - "TableModifiers", - (_message.Message,), - { - "DESCRIPTOR": _READSESSION_TABLEMODIFIERS, - "__module__": "google.cloud.bigquery_storage_v1.proto.stream_pb2", - "__doc__": """Additional attributes when reading a table. - - Attributes: - snapshot_time: - The snapshot time of the table. If not set, interpreted as - now. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ReadSession.TableModifiers) - }, - ), - "TableReadOptions": _reflection.GeneratedProtocolMessageType( - "TableReadOptions", - (_message.Message,), - { - "DESCRIPTOR": _READSESSION_TABLEREADOPTIONS, - "__module__": "google.cloud.bigquery_storage_v1.proto.stream_pb2", - "__doc__": """Options dictating how we read a table. - - Attributes: - selected_fields: - Names of the fields in the table that should be read. If - empty, all fields will be read. If the specified field is a - nested field, all the sub-fields in the field will be - selected. The output field order is unrelated to the order of - fields in selected_fields. - row_restriction: - SQL text filtering statement, similar to a WHERE clause in a - query. Aggregates are not supported. Examples: “int_field > - 5” “date_field = CAST(‘2014-9-27’ as DATE)” “nullable_field is - not NULL” “st_equals(geo_field, st_geofromtext(”POINT(2, - 2)“))” “numeric_field BETWEEN 1.0 AND 5.0” - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ReadSession.TableReadOptions) - }, - ), - "DESCRIPTOR": _READSESSION, - "__module__": "google.cloud.bigquery_storage_v1.proto.stream_pb2", - "__doc__": """Information about the ReadSession. - - Attributes: - name: - Output only. Unique identifier for the session, in the form `` - projects/{project_id}/locations/{location}/sessions/{session_i - d}``. - expire_time: - Output only. Time at which the session becomes invalid. After - this time, subsequent requests to read this Session will - return errors. The expire_time is automatically assigned and - currently cannot be specified or updated. - data_format: - Immutable. Data format of the output data. - schema: - The schema for the read. If read_options.selected_fields is - set, the schema may be different from the table schema as it - will only contain the selected fields. - avro_schema: - Output only. Avro schema. - arrow_schema: - Output only. Arrow schema. - table: - Immutable. Table that this ReadSession is reading from, in the - form ``projects/{project_id}/datasets/{dataset_id}/tables/{tab - le_id}`` - table_modifiers: - Optional. Any modifiers which are applied when reading from - the specified table. - read_options: - Optional. Read options for this session (e.g. column - selection, filters). - streams: - Output only. A list of streams created with the session. At - least one stream is created with the session. In the future, - larger request_stream_count values *may* result in this list - being unpopulated, in that case, the user will need to use a - List method to get the streams instead, which is not yet - available. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ReadSession) - }, -) -_sym_db.RegisterMessage(ReadSession) -_sym_db.RegisterMessage(ReadSession.TableModifiers) -_sym_db.RegisterMessage(ReadSession.TableReadOptions) - -ReadStream = _reflection.GeneratedProtocolMessageType( - "ReadStream", - (_message.Message,), - { - "DESCRIPTOR": _READSTREAM, - "__module__": "google.cloud.bigquery_storage_v1.proto.stream_pb2", - "__doc__": """Information about a single stream that gets data out of the storage - system. Most of the information about ``ReadStream`` instances is - aggregated, making ``ReadStream`` lightweight. - - Attributes: - name: - Output only. Name of the stream, in the form ``projects/{proje - ct_id}/locations/{location}/sessions/{session_id}/streams/{str - eam_id}``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1.ReadStream) - }, -) -_sym_db.RegisterMessage(ReadStream) - - -DESCRIPTOR._options = None -_READSESSION.fields_by_name["name"]._options = None -_READSESSION.fields_by_name["expire_time"]._options = None -_READSESSION.fields_by_name["data_format"]._options = None -_READSESSION.fields_by_name["avro_schema"]._options = None -_READSESSION.fields_by_name["arrow_schema"]._options = None -_READSESSION.fields_by_name["table"]._options = None -_READSESSION.fields_by_name["table_modifiers"]._options = None -_READSESSION.fields_by_name["read_options"]._options = None -_READSESSION.fields_by_name["streams"]._options = None -_READSESSION._options = None -_READSTREAM.fields_by_name["name"]._options = None -_READSTREAM._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1/proto/stream_pb2_grpc.py b/google/cloud/bigquery_storage_v1/proto/stream_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1/proto/stream_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1/reader.py b/google/cloud/bigquery_storage_v1/reader.py index 3587ff9e..89df3c4d 100644 --- a/google/cloud/bigquery_storage_v1/reader.py +++ b/google/cloud/bigquery_storage_v1/reader.py @@ -81,12 +81,12 @@ def __init__(self, wrapped, client, name, offset, read_rows_kwargs): Args: wrapped (Iterable[ \ - ~google.cloud.bigquery_storage_v1.types.ReadRowsResponse \ + ~google.cloud.bigquery.storage.types.ReadRowsResponse \ ]): The ReadRows stream to read. client ( \ - ~google.cloud.bigquery_storage_v1.gapic. \ - big_query_read_client.BigQueryReadClient \ + ~google.cloud.bigquery.storage_v1.services. \ + big_query_read.BigQueryReadClient \ ): A GAPIC client used to reconnect to a ReadRows stream. This must be the GAPIC client to avoid a circular dependency on @@ -153,7 +153,7 @@ def __iter__(self): def _reconnect(self): """Reconnect to the ReadRows stream using the most recent offset.""" self._wrapped = self._client.read_rows( - self._name, self._offset, **self._read_rows_kwargs + read_stream=self._name, offset=self._offset, **self._read_rows_kwargs ) def rows(self, read_session): @@ -328,7 +328,7 @@ def to_dataframe(self, dtypes=None): # pandas dataframe is about 2x faster. This is because pandas.concat is # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is # usually no-copy. - schema_type = self._read_session.WhichOneof("schema") + schema_type = self._read_session._pb.WhichOneof("schema") if schema_type == "arrow_schema": record_batch = self.to_arrow() @@ -493,7 +493,7 @@ def to_rows(self, message): @staticmethod def from_read_session(read_session): - schema_type = read_session.WhichOneof("schema") + schema_type = read_session._pb.WhichOneof("schema") if schema_type == "avro_schema": return _AvroStreamParser(read_session) elif schema_type == "arrow_schema": diff --git a/google/cloud/bigquery_storage_v1/types.py b/google/cloud/bigquery_storage_v1/types.py index 2d6ea9cd..1c3dbc9f 100644 --- a/google/cloud/bigquery_storage_v1/types.py +++ b/google/cloud/bigquery_storage_v1/types.py @@ -14,38 +14,67 @@ # See the License for the specific language governing permissions and # limitations under the License. - from __future__ import absolute_import + +import collections +import inspect import sys -from google.api_core.protobuf_helpers import get_messages +import proto -from google.cloud.bigquery_storage_v1.proto import arrow_pb2 -from google.cloud.bigquery_storage_v1.proto import avro_pb2 -from google.cloud.bigquery_storage_v1.proto import storage_pb2 -from google.cloud.bigquery_storage_v1.proto import stream_pb2 +from google.cloud.bigquery.storage_v1.types import arrow +from google.cloud.bigquery.storage_v1.types import avro +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream + +from google.protobuf import message as protobuf_message from google.protobuf import timestamp_pb2 +# The current api core helper does not find new proto messages of type proto.Message, +# thus we need our own helper. Adjusted from +# https://github.com/googleapis/python-api-core/blob/8595f620e7d8295b6a379d6fd7979af3bef717e2/google/api_core/protobuf_helpers.py#L101-L118 +def _get_protobuf_messages(module): + """Discover all protobuf Message classes in a given import module. + + Args: + module (module): A Python module; :func:`dir` will be run against this + module to find Message subclasses. + + Returns: + dict[str, proto.Message]: A dictionary with the + Message class names as keys, and the Message subclasses themselves + as values. + """ + answer = collections.OrderedDict() + for name in dir(module): + candidate = getattr(module, name) + if inspect.isclass(candidate) and issubclass( + candidate, (proto.Enum, proto.Message, protobuf_message.Message) + ): + answer[name] = candidate + return answer + + _shared_modules = [ timestamp_pb2, ] _local_modules = [ - arrow_pb2, - avro_pb2, - storage_pb2, - stream_pb2, + arrow, + avro, + storage, + stream, ] names = [] for module in _shared_modules: # pragma: NO COVER - for name, message in get_messages(module).items(): + for name, message in _get_protobuf_messages(module).items(): setattr(sys.modules[__name__], name, message) names.append(name) -for module in _local_modules: - for name, message in get_messages(module).items(): +for module in _local_modules: # pragma: NO COVER + for name, message in _get_protobuf_messages(module).items(): message.__module__ = "google.cloud.bigquery_storage_v1.types" setattr(sys.modules[__name__], name, message) names.append(name) diff --git a/google/cloud/bigquery_storage_v1beta1/__init__.py b/google/cloud/bigquery_storage_v1beta1/__init__.py deleted file mode 100644 index e355a0f6..00000000 --- a/google/cloud/bigquery_storage_v1beta1/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -import pkg_resources - -__version__ = pkg_resources.get_distribution( - "google-cloud-bigquery-storage" -).version # noqa - -from google.cloud.bigquery_storage_v1beta1 import types -from google.cloud.bigquery_storage_v1beta1 import client -from google.cloud.bigquery_storage_v1beta1.gapic import enums - - -class BigQueryStorageClient(client.BigQueryStorageClient): - __doc__ = client.BigQueryStorageClient.__doc__ - enums = enums - - -__all__ = ( - # google.cloud.bigquery_storage_v1beta1 - "__version__", - "types", - # google.cloud.bigquery_storage_v1beta1.client - "BigQueryStorageClient", - # google.cloud.bigquery_storage_v1beta1.gapic - "enums", -) diff --git a/google/cloud/bigquery_storage_v1beta1/client.py b/google/cloud/bigquery_storage_v1beta1/client.py deleted file mode 100644 index 4f53a305..00000000 --- a/google/cloud/bigquery_storage_v1beta1/client.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Parent client for calling the Cloud BigQuery Storage API. - -This is the base from which all interactions with the API occur. -""" - -from __future__ import absolute_import - -import google.api_core.gapic_v1.method - -from google.cloud.bigquery_storage_v1beta1 import reader -from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa - - -_SCOPES = ( - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/cloud-platform", -) - - -class BigQueryStorageClient(big_query_storage_client.BigQueryStorageClient): - """Client for interacting with BigQuery Storage API. - - The BigQuery storage API can be used to read data stored in BigQuery. - """ - - def read_rows( - self, - read_position, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Reads rows from the table in the format prescribed by the read - session. Each response contains one or more table rows, up to a - maximum of 10 MiB per response; read requests which attempt to read - individual rows larger than this will fail. - - Each request also returns a set of stream statistics reflecting the - estimated total number of rows in the read stream. This number is - computed based on the total table size and the number of active - streams in the read session, and may change as other streams continue - to read data. - - Example: - >>> from google.cloud import bigquery_storage_v1beta1 - >>> - >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() - >>> - >>> # TODO: Initialize ``table_reference``: - >>> table_reference = { - ... 'project_id': 'your-data-project-id', - ... 'dataset_id': 'your_dataset_id', - ... 'table_id': 'your_table_id', - ... } - >>> - >>> # TODO: Initialize `parent`: - >>> parent = 'projects/your-billing-project-id' - >>> - >>> session = client.create_read_session(table_reference, parent) - >>> read_position = bigquery_storage_v1beta1.types.StreamPosition( - ... stream=session.streams[0], # TODO: Read the other streams. - ... ) - >>> - >>> for element in client.read_rows(read_position): - ... # process element - ... pass - - Args: - read_position (Union[ \ - dict, \ - ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ - ]): - Required. Identifier of the position in the stream to start - reading from. The offset requested must be less than the last - row read from ReadRows. Requesting a larger offset is - undefined. If a dict is provided, it must be of the same form - as the protobuf message - :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will not - be retried. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - ~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream: - An iterable of - :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse`. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - gapic_client = super(BigQueryStorageClient, self) - stream = gapic_client.read_rows( - read_position, retry=retry, timeout=timeout, metadata=metadata - ) - return reader.ReadRowsStream( - stream, - gapic_client, - read_position, - {"retry": retry, "timeout": timeout, "metadata": metadata}, - ) diff --git a/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py b/google/cloud/bigquery_storage_v1beta1/gapic/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py b/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py deleted file mode 100644 index f21a590d..00000000 --- a/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client.py +++ /dev/null @@ -1,690 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Accesses the google.cloud.bigquery.storage.v1beta1 BigQueryStorage API.""" - -import pkg_resources -import warnings - -from google.oauth2 import service_account -import google.api_core.client_options -import google.api_core.gapic_v1.client_info -import google.api_core.gapic_v1.config -import google.api_core.gapic_v1.method -import google.api_core.path_template -import google.api_core.gapic_v1.routing_header -import google.api_core.grpc_helpers -import google.api_core.path_template -import grpc - -from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client_config -from google.cloud.bigquery_storage_v1beta1.gapic import enums -from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( - big_query_storage_grpc_transport, -) -from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2_grpc -from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 -from google.protobuf import empty_pb2 - - -_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( - "google-cloud-bigquery-storage", -).version - - -class BigQueryStorageClient(object): - """ - BigQuery storage API. - - The BigQuery storage API can be used to read data stored in BigQuery. - """ - - SERVICE_ADDRESS = "bigquerystorage.googleapis.com:443" - """The default address of the service.""" - - # The name of the interface for this client. This is the key used to - # find the method configuration in the client_config dictionary. - _INTERFACE_NAME = "google.cloud.bigquery.storage.v1beta1.BigQueryStorage" - - @classmethod - def from_service_account_file(cls, filename, *args, **kwargs): - """Creates an instance of this client using the provided credentials - file. - - Args: - filename (str): The path to the service account private key json - file. - args: Additional arguments to pass to the constructor. - kwargs: Additional arguments to pass to the constructor. - - Returns: - BigQueryStorageClient: The constructed client. - """ - credentials = service_account.Credentials.from_service_account_file(filename) - kwargs["credentials"] = credentials - return cls(*args, **kwargs) - - from_service_account_json = from_service_account_file - - @classmethod - def project_path(cls, project): - """Return a fully-qualified project string.""" - return google.api_core.path_template.expand( - "projects/{project}", project=project, - ) - - @classmethod - def read_session_path(cls, project, location, session): - """Return a fully-qualified read_session string.""" - return google.api_core.path_template.expand( - "projects/{project}/locations/{location}/sessions/{session}", - project=project, - location=location, - session=session, - ) - - @classmethod - def stream_path(cls, project, location, stream): - """Return a fully-qualified stream string.""" - return google.api_core.path_template.expand( - "projects/{project}/locations/{location}/streams/{stream}", - project=project, - location=location, - stream=stream, - ) - - def __init__( - self, - transport=None, - channel=None, - credentials=None, - client_config=None, - client_info=None, - client_options=None, - ): - """Constructor. - - Args: - transport (Union[~.BigQueryStorageGrpcTransport, - Callable[[~.Credentials, type], ~.BigQueryStorageGrpcTransport]): A transport - instance, responsible for actually making the API calls. - The default transport uses the gRPC protocol. - This argument may also be a callable which returns a - transport instance. Callables will be sent the credentials - as the first argument and the default transport class as - the second argument. - channel (grpc.Channel): DEPRECATED. A ``Channel`` instance - through which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - This argument is mutually exclusive with providing a - transport instance to ``transport``; doing so will raise - an exception. - client_config (dict): DEPRECATED. A dictionary of call options for - each method. If not specified, the default configuration is used. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing - your own client library. - client_options (Union[dict, google.api_core.client_options.ClientOptions]): - Client options used to set user options on the client. API Endpoint - should be set through client_options. - """ - # Raise deprecation warnings for things we want to go away. - if client_config is not None: - warnings.warn( - "The `client_config` argument is deprecated.", - PendingDeprecationWarning, - stacklevel=2, - ) - else: - client_config = big_query_storage_client_config.config - - if channel: - warnings.warn( - "The `channel` argument is deprecated; use " "`transport` instead.", - PendingDeprecationWarning, - stacklevel=2, - ) - - api_endpoint = self.SERVICE_ADDRESS - if client_options: - if type(client_options) == dict: - client_options = google.api_core.client_options.from_dict( - client_options - ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - - # Instantiate the transport. - # The transport is responsible for handling serialization and - # deserialization and actually sending data to the service. - if transport: # pragma: no cover - if callable(transport): - self.transport = transport( - credentials=credentials, - default_class=big_query_storage_grpc_transport.BigQueryStorageGrpcTransport, - address=api_endpoint, - ) - else: - if credentials: - raise ValueError( - "Received both a transport instance and " - "credentials; these are mutually exclusive." - ) - self.transport = transport - else: - self.transport = big_query_storage_grpc_transport.BigQueryStorageGrpcTransport( - address=api_endpoint, channel=channel, credentials=credentials, - ) - - if client_info is None: - client_info = google.api_core.gapic_v1.client_info.ClientInfo( - gapic_version=_GAPIC_LIBRARY_VERSION, - ) - else: - client_info.gapic_version = _GAPIC_LIBRARY_VERSION - self._client_info = client_info - - # Parse out the default settings for retry and timeout for each RPC - # from the client configuration. - # (Ordinarily, these are the defaults specified in the `*_config.py` - # file next to this one.) - self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( - client_config["interfaces"][self._INTERFACE_NAME], - ) - - # Save a dictionary of cached API call functions. - # These are the actual callables which invoke the proper - # transport methods, wrapped with `wrap_method` to add retry, - # timeout, and the like. - self._inner_api_calls = {} - - # Service calls - def create_read_session( - self, - table_reference, - parent, - table_modifiers=None, - requested_streams=None, - read_options=None, - format_=None, - sharding_strategy=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - - Example: - >>> from google.cloud import bigquery_storage_v1beta1 - >>> - >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() - >>> - >>> # TODO: Initialize `table_reference`: - >>> table_reference = {} - >>> parent = client.project_path('[PROJECT]') - >>> - >>> response = client.create_read_session(table_reference, parent) - - Args: - table_reference (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReference]): Required. Reference to the table to read. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReference` - parent (str): A value in the range [0.0, 1.0] that represents the fraction of rows - assigned to this stream that have been processed by the server. In the - presence of read filters, the server may process more rows than it - returns, so this value reflects progress through the pre-filtering rows. - - This value is only populated for sessions created through the BALANCED - sharding strategy. - table_modifiers (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableModifiers]): Any modifiers to the Table (e.g. snapshot timestamp). - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableModifiers` - requested_streams (int): Initial number of streams. If unset or 0, we will - provide a value of streams so as to produce reasonable throughput. Must be - non-negative. The number of streams may be lower than the requested number, - depending on the amount parallelism that is reasonable for the table and - the maximum amount of parallelism allowed by the system. - - Streams must be read starting from offset 0. - read_options (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions]): Read options for this session (e.g. column selection, filters). - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.TableReadOptions` - format_ (~google.cloud.bigquery_storage_v1beta1.types.DataFormat): Data output format. Currently default to Avro. - sharding_strategy (~google.cloud.bigquery_storage_v1beta1.types.ShardingStrategy): The strategy to use for distributing data among multiple streams. Currently - defaults to liquid sharding. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "create_read_session" not in self._inner_api_calls: - self._inner_api_calls[ - "create_read_session" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.create_read_session, - default_retry=self._method_configs["CreateReadSession"].retry, - default_timeout=self._method_configs["CreateReadSession"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.CreateReadSessionRequest( - table_reference=table_reference, - parent=parent, - table_modifiers=table_modifiers, - requested_streams=requested_streams, - read_options=read_options, - format=format_, - sharding_strategy=sharding_strategy, - ) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [ - ("table_reference.project_id", table_reference.project_id), - ("table_reference.dataset_id", table_reference.dataset_id), - ] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["create_read_session"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def read_rows( - self, - read_position, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Reads rows from the table in the format prescribed by the read session. - Each response contains one or more table rows, up to a maximum of 10 MiB - per response; read requests which attempt to read individual rows larger - than this will fail. - - Each request also returns a set of stream statistics reflecting the - estimated total number of rows in the read stream. This number is computed - based on the total table size and the number of active streams in the read - session, and may change as other streams continue to read data. - - Example: - >>> from google.cloud import bigquery_storage_v1beta1 - >>> - >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() - >>> - >>> # TODO: Initialize `read_position`: - >>> read_position = {} - >>> - >>> for element in client.read_rows(read_position): - ... # process element - ... pass - - Args: - read_position (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition]): Required. Identifier of the position in the stream to start reading from. - The offset requested must be less than the last row read from ReadRows. - Requesting a larger offset is undefined. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - Iterable[~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse]. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "read_rows" not in self._inner_api_calls: - self._inner_api_calls[ - "read_rows" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.read_rows, - default_retry=self._method_configs["ReadRows"].retry, - default_timeout=self._method_configs["ReadRows"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.ReadRowsRequest(read_position=read_position,) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("read_position.stream.name", read_position.stream.name)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["read_rows"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def batch_create_read_session_streams( - self, - session, - requested_streams, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Creates additional streams for a ReadSession. This API can be used to - dynamically adjust the parallelism of a batch processing task upwards by - adding additional workers. - - Example: - >>> from google.cloud import bigquery_storage_v1beta1 - >>> - >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() - >>> - >>> # TODO: Initialize `session`: - >>> session = {} - >>> - >>> # TODO: Initialize `requested_streams`: - >>> requested_streams = 0 - >>> - >>> response = client.batch_create_read_session_streams(session, requested_streams) - - Args: - session (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.ReadSession]): Required. Must be a non-expired session obtained from a call to - CreateReadSession. Only the name field needs to be set. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadSession` - requested_streams (int): Required. Number of new streams requested. Must be positive. - Number of added streams may be less than this, see CreateReadSessionRequest - for more information. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1beta1.types.BatchCreateReadSessionStreamsResponse` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "batch_create_read_session_streams" not in self._inner_api_calls: - self._inner_api_calls[ - "batch_create_read_session_streams" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.batch_create_read_session_streams, - default_retry=self._method_configs[ - "BatchCreateReadSessionStreams" - ].retry, - default_timeout=self._method_configs[ - "BatchCreateReadSessionStreams" - ].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.BatchCreateReadSessionStreamsRequest( - session=session, requested_streams=requested_streams, - ) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("session.name", session.name)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["batch_create_read_session_streams"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def finalize_stream( - self, - stream, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Triggers the graceful termination of a single stream in a ReadSession. This - API can be used to dynamically adjust the parallelism of a batch processing - task downwards without losing data. - - This API does not delete the stream -- it remains visible in the - ReadSession, and any data processed by the stream is not released to other - streams. However, no additional data will be assigned to the stream once - this call completes. Callers must continue reading data on the stream until - the end of the stream is reached so that data which has already been - assigned to the stream will be processed. - - This method will return an error if there are no other live streams - in the Session, or if SplitReadStream() has been called on the given - Stream. - - Example: - >>> from google.cloud import bigquery_storage_v1beta1 - >>> - >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() - >>> - >>> # TODO: Initialize `stream`: - >>> stream = {} - >>> - >>> client.finalize_stream(stream) - - Args: - stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Required. Stream to finalize. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "finalize_stream" not in self._inner_api_calls: - self._inner_api_calls[ - "finalize_stream" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.finalize_stream, - default_retry=self._method_configs["FinalizeStream"].retry, - default_timeout=self._method_configs["FinalizeStream"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.FinalizeStreamRequest(stream=stream,) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("stream.name", stream.name)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - self._inner_api_calls["finalize_stream"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def split_read_stream( - self, - original_stream, - fraction=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - An annotation that describes a resource definition, see - ``ResourceDescriptor``. - - Example: - >>> from google.cloud import bigquery_storage_v1beta1 - >>> - >>> client = bigquery_storage_v1beta1.BigQueryStorageClient() - >>> - >>> # TODO: Initialize `original_stream`: - >>> original_stream = {} - >>> - >>> response = client.split_read_stream(original_stream) - - Args: - original_stream (Union[dict, ~google.cloud.bigquery_storage_v1beta1.types.Stream]): Required. Stream to split. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta1.types.Stream` - fraction (float): A value in the range (0.0, 1.0) that specifies the fractional point at - which the original stream should be split. The actual split point is - evaluated on pre-filtered rows, so if a filter is provided, then there is - no guarantee that the division of the rows between the new child streams - will be proportional to this fractional value. Additionally, because the - server-side unit for assigning data is collections of rows, this fraction - will always map to to a data storage boundary on the server side. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1beta1.types.SplitReadStreamResponse` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "split_read_stream" not in self._inner_api_calls: - self._inner_api_calls[ - "split_read_stream" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.split_read_stream, - default_retry=self._method_configs["SplitReadStream"].retry, - default_timeout=self._method_configs["SplitReadStream"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.SplitReadStreamRequest( - original_stream=original_stream, fraction=fraction, - ) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("original_stream.name", original_stream.name)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["split_read_stream"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) diff --git a/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py b/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py deleted file mode 100644 index ec15a186..00000000 --- a/google/cloud/bigquery_storage_v1beta1/gapic/big_query_storage_client_config.py +++ /dev/null @@ -1,77 +0,0 @@ -config = { - "interfaces": { - "google.cloud.bigquery.storage.v1beta1.BigQueryStorage": { - "retry_codes": { - "retry_policy_1_codes": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "no_retry_codes": [], - "retry_policy_3_codes": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "retry_policy_2_codes": ["UNAVAILABLE"], - }, - "retry_params": { - "retry_policy_1_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 600000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 600000, - "total_timeout_millis": 600000, - }, - "retry_policy_2_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 86400000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 86400000, - "total_timeout_millis": 86400000, - }, - "retry_policy_3_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 600000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 600000, - "total_timeout_millis": 600000, - }, - "no_retry_params": { - "initial_retry_delay_millis": 0, - "retry_delay_multiplier": 0.0, - "max_retry_delay_millis": 0, - "initial_rpc_timeout_millis": 0, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 0, - "total_timeout_millis": 0, - }, - }, - "methods": { - "CreateReadSession": { - "timeout_millis": 600000, - "retry_codes_name": "retry_policy_1_codes", - "retry_params_name": "retry_policy_1_params", - }, - "ReadRows": { - "timeout_millis": 21600000, - "retry_codes_name": "retry_policy_2_codes", - "retry_params_name": "retry_policy_2_params", - }, - "BatchCreateReadSessionStreams": { - "timeout_millis": 600000, - "retry_codes_name": "retry_policy_3_codes", - "retry_params_name": "retry_policy_3_params", - }, - "FinalizeStream": { - "timeout_millis": 600000, - "retry_codes_name": "retry_policy_3_codes", - "retry_params_name": "retry_policy_3_params", - }, - "SplitReadStream": { - "timeout_millis": 600000, - "retry_codes_name": "retry_policy_3_codes", - "retry_params_name": "retry_policy_3_params", - }, - }, - } - } -} diff --git a/google/cloud/bigquery_storage_v1beta1/gapic/enums.py b/google/cloud/bigquery_storage_v1beta1/gapic/enums.py deleted file mode 100644 index 71649e1d..00000000 --- a/google/cloud/bigquery_storage_v1beta1/gapic/enums.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers for protocol buffer enum types.""" - -import enum - - -class DataFormat(enum.IntEnum): - """ - Data format for input or output data. - - Attributes: - DATA_FORMAT_UNSPECIFIED (int): Data format is unspecified. - AVRO (int): Avro is a standard open source row based file format. - See https://avro.apache.org/ for more details. - ARROW (int) - """ - - DATA_FORMAT_UNSPECIFIED = 0 - AVRO = 1 - ARROW = 3 - - -class ShardingStrategy(enum.IntEnum): - """ - Strategy for distributing data among multiple streams in a read session. - - Attributes: - SHARDING_STRATEGY_UNSPECIFIED (int): Same as LIQUID. - LIQUID (int): Assigns data to each stream based on the client's read rate. The faster the - client reads from a stream, the more data is assigned to the stream. In - this strategy, it's possible to read all data from a single stream even if - there are other streams present. - BALANCED (int): Assigns data to each stream such that roughly the same number of rows can - be read from each stream. Because the server-side unit for assigning data - is collections of rows, the API does not guarantee that each stream will - return the same number or rows. Additionally, the limits are enforced based - on the number of pre-filtering rows, so some filters can lead to lopsided - assignments. - """ - - SHARDING_STRATEGY_UNSPECIFIED = 0 - LIQUID = 1 - BALANCED = 2 diff --git a/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py b/google/cloud/bigquery_storage_v1beta1/gapic/transports/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py b/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py deleted file mode 100644 index 809483d9..00000000 --- a/google/cloud/bigquery_storage_v1beta1/gapic/transports/big_query_storage_grpc_transport.py +++ /dev/null @@ -1,215 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import google.api_core.grpc_helpers - -from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2_grpc - - -class BigQueryStorageGrpcTransport(object): - """gRPC transport class providing stubs for - google.cloud.bigquery.storage.v1beta1 BigQueryStorage API. - - The transport provides access to the raw gRPC stubs, - which can be used to take advantage of advanced - features of gRPC. - """ - - # The scopes needed to make gRPC calls to all of the methods defined - # in this service. - _OAUTH_SCOPES = ( - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/bigquery.readonly", - "https://www.googleapis.com/auth/cloud-platform", - ) - - def __init__( - self, - channel=None, - credentials=None, - address="bigquerystorage.googleapis.com:443", - ): - """Instantiate the transport class. - - Args: - channel (grpc.Channel): A ``Channel`` instance through - which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - address (str): The address where the service is hosted. - """ - # If both `channel` and `credentials` are specified, raise an - # exception (channels come with credentials baked in already). - if channel is not None and credentials is not None: # pragma: no cover - raise ValueError( - "The `channel` and `credentials` arguments are mutually " "exclusive.", - ) - - # Create the channel. - if channel is None: # pragma: no cover - channel = self.create_channel( - address=address, - credentials=credentials, - options={ - "grpc.max_send_message_length": -1, - "grpc.max_receive_message_length": -1, - }.items(), - ) - - self._channel = channel - - # gRPC uses objects called "stubs" that are bound to the - # channel and provide a basic method for each RPC. - self._stubs = { - "big_query_storage_stub": storage_pb2_grpc.BigQueryStorageStub(channel), - } - - @classmethod - def create_channel( - cls, address="bigquerystorage.googleapis.com:443", credentials=None, **kwargs - ): - """Create and return a gRPC channel object. - - Args: - address (str): The host for the channel to use. - credentials (~.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If - none are specified, the client will attempt to ascertain - the credentials from the environment. - kwargs (dict): Keyword arguments, which are passed to the - channel creation. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return google.api_core.grpc_helpers.create_channel( # pragma: no cover - address, credentials=credentials, scopes=cls._OAUTH_SCOPES, **kwargs - ) - - @property - def channel(self): - """The gRPC channel used by the transport. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return self._channel - - @property - def create_read_session(self): - """Return the gRPC stub for :meth:`BigQueryStorageClient.create_read_session`. - - Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_storage_stub"].CreateReadSession - - @property - def read_rows(self): - """Return the gRPC stub for :meth:`BigQueryStorageClient.read_rows`. - - Reads rows from the table in the format prescribed by the read session. - Each response contains one or more table rows, up to a maximum of 10 MiB - per response; read requests which attempt to read individual rows larger - than this will fail. - - Each request also returns a set of stream statistics reflecting the - estimated total number of rows in the read stream. This number is computed - based on the total table size and the number of active streams in the read - session, and may change as other streams continue to read data. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_storage_stub"].ReadRows - - @property - def batch_create_read_session_streams(self): - """Return the gRPC stub for :meth:`BigQueryStorageClient.batch_create_read_session_streams`. - - Creates additional streams for a ReadSession. This API can be used to - dynamically adjust the parallelism of a batch processing task upwards by - adding additional workers. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_storage_stub"].BatchCreateReadSessionStreams - - @property - def finalize_stream(self): - """Return the gRPC stub for :meth:`BigQueryStorageClient.finalize_stream`. - - Triggers the graceful termination of a single stream in a ReadSession. This - API can be used to dynamically adjust the parallelism of a batch processing - task downwards without losing data. - - This API does not delete the stream -- it remains visible in the - ReadSession, and any data processed by the stream is not released to other - streams. However, no additional data will be assigned to the stream once - this call completes. Callers must continue reading data on the stream until - the end of the stream is reached so that data which has already been - assigned to the stream will be processed. - - This method will return an error if there are no other live streams - in the Session, or if SplitReadStream() has been called on the given - Stream. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_storage_stub"].FinalizeStream - - @property - def split_read_stream(self): - """Return the gRPC stub for :meth:`BigQueryStorageClient.split_read_stream`. - - An annotation that describes a resource definition, see - ``ResourceDescriptor``. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_storage_stub"].SplitReadStream diff --git a/google/cloud/bigquery_storage_v1beta1/proto/__init__.py b/google/cloud/bigquery_storage_v1beta1/proto/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta1/proto/arrow.proto b/google/cloud/bigquery_storage_v1beta1/proto/arrow.proto deleted file mode 100644 index f70c61c7..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/arrow.proto +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta1; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage"; -option java_outer_classname = "ArrowProto"; -option java_package = "com.google.cloud.bigquery.storage.v1beta1"; - -// Arrow schema. -message ArrowSchema { - // IPC serialized Arrow schema. - bytes serialized_schema = 1; -} - -// Arrow RecordBatch. -message ArrowRecordBatch { - // IPC serialized Arrow RecordBatch. - bytes serialized_record_batch = 1; - - // The count of rows in the returning block. - int64 row_count = 2; -} diff --git a/google/cloud/bigquery_storage_v1beta1/proto/arrow_pb2.py b/google/cloud/bigquery_storage_v1beta1/proto/arrow_pb2.py deleted file mode 100644 index 369cfa2c..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/arrow_pb2.py +++ /dev/null @@ -1,167 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta1/proto/arrow.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta1/proto/arrow.proto", - package="google.cloud.bigquery.storage.v1beta1", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta1B\nArrowProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n7google/cloud/bigquery_storage_v1beta1/proto/arrow.proto\x12%google.cloud.bigquery.storage.v1beta1"(\n\x0b\x41rrowSchema\x12\x19\n\x11serialized_schema\x18\x01 \x01(\x0c"F\n\x10\x41rrowRecordBatch\x12\x1f\n\x17serialized_record_batch\x18\x01 \x01(\x0c\x12\x11\n\trow_count\x18\x02 \x01(\x03\x42\x85\x01\n)com.google.cloud.bigquery.storage.v1beta1B\nArrowProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3', -) - - -_ARROWSCHEMA = _descriptor.Descriptor( - name="ArrowSchema", - full_name="google.cloud.bigquery.storage.v1beta1.ArrowSchema", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_schema", - full_name="google.cloud.bigquery.storage.v1beta1.ArrowSchema.serialized_schema", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=98, - serialized_end=138, -) - - -_ARROWRECORDBATCH = _descriptor.Descriptor( - name="ArrowRecordBatch", - full_name="google.cloud.bigquery.storage.v1beta1.ArrowRecordBatch", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_record_batch", - full_name="google.cloud.bigquery.storage.v1beta1.ArrowRecordBatch.serialized_record_batch", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1beta1.ArrowRecordBatch.row_count", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=140, - serialized_end=210, -) - -DESCRIPTOR.message_types_by_name["ArrowSchema"] = _ARROWSCHEMA -DESCRIPTOR.message_types_by_name["ArrowRecordBatch"] = _ARROWRECORDBATCH -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ArrowSchema = _reflection.GeneratedProtocolMessageType( - "ArrowSchema", - (_message.Message,), - { - "DESCRIPTOR": _ARROWSCHEMA, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.arrow_pb2", - "__doc__": """Arrow schema. - - Attributes: - serialized_schema: - IPC serialized Arrow schema. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ArrowSchema) - }, -) -_sym_db.RegisterMessage(ArrowSchema) - -ArrowRecordBatch = _reflection.GeneratedProtocolMessageType( - "ArrowRecordBatch", - (_message.Message,), - { - "DESCRIPTOR": _ARROWRECORDBATCH, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.arrow_pb2", - "__doc__": """Arrow RecordBatch. - - Attributes: - serialized_record_batch: - IPC serialized Arrow RecordBatch. - row_count: - The count of rows in the returning block. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ArrowRecordBatch) - }, -) -_sym_db.RegisterMessage(ArrowRecordBatch) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta1/proto/arrow_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta1/proto/arrow_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/arrow_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta1/proto/avro.proto b/google/cloud/bigquery_storage_v1beta1/proto/avro.proto deleted file mode 100644 index 7d034a28..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/avro.proto +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta1; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage"; -option java_outer_classname = "AvroProto"; -option java_package = "com.google.cloud.bigquery.storage.v1beta1"; - -// Avro schema. -message AvroSchema { - // Json serialized schema, as described at - // https://avro.apache.org/docs/1.8.1/spec.html - string schema = 1; -} - -// Avro rows. -message AvroRows { - // Binary serialized rows in a block. - bytes serialized_binary_rows = 1; - - // The count of rows in the returning block. - int64 row_count = 2; -} diff --git a/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py b/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py deleted file mode 100644 index f0a81a90..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2.py +++ /dev/null @@ -1,168 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta1/proto/avro.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta1/proto/avro.proto", - package="google.cloud.bigquery.storage.v1beta1", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta1B\tAvroProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n6google/cloud/bigquery_storage_v1beta1/proto/avro.proto\x12%google.cloud.bigquery.storage.v1beta1"\x1c\n\nAvroSchema\x12\x0e\n\x06schema\x18\x01 \x01(\t"=\n\x08\x41vroRows\x12\x1e\n\x16serialized_binary_rows\x18\x01 \x01(\x0c\x12\x11\n\trow_count\x18\x02 \x01(\x03\x42\x84\x01\n)com.google.cloud.bigquery.storage.v1beta1B\tAvroProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3', -) - - -_AVROSCHEMA = _descriptor.Descriptor( - name="AvroSchema", - full_name="google.cloud.bigquery.storage.v1beta1.AvroSchema", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="schema", - full_name="google.cloud.bigquery.storage.v1beta1.AvroSchema.schema", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=97, - serialized_end=125, -) - - -_AVROROWS = _descriptor.Descriptor( - name="AvroRows", - full_name="google.cloud.bigquery.storage.v1beta1.AvroRows", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_binary_rows", - full_name="google.cloud.bigquery.storage.v1beta1.AvroRows.serialized_binary_rows", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1beta1.AvroRows.row_count", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=127, - serialized_end=188, -) - -DESCRIPTOR.message_types_by_name["AvroSchema"] = _AVROSCHEMA -DESCRIPTOR.message_types_by_name["AvroRows"] = _AVROROWS -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -AvroSchema = _reflection.GeneratedProtocolMessageType( - "AvroSchema", - (_message.Message,), - { - "DESCRIPTOR": _AVROSCHEMA, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.avro_pb2", - "__doc__": """Avro schema. - - Attributes: - schema: - Json serialized schema, as described at - https://avro.apache.org/docs/1.8.1/spec.html - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.AvroSchema) - }, -) -_sym_db.RegisterMessage(AvroSchema) - -AvroRows = _reflection.GeneratedProtocolMessageType( - "AvroRows", - (_message.Message,), - { - "DESCRIPTOR": _AVROROWS, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.avro_pb2", - "__doc__": """Avro rows. - - Attributes: - serialized_binary_rows: - Binary serialized rows in a block. - row_count: - The count of rows in the returning block. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.AvroRows) - }, -) -_sym_db.RegisterMessage(AvroRows) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/avro_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta1/proto/read_options.proto b/google/cloud/bigquery_storage_v1beta1/proto/read_options.proto deleted file mode 100644 index 1ff8d8b5..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/read_options.proto +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta1; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage"; -option java_package = "com.google.cloud.bigquery.storage.v1beta1"; - -// Options dictating how we read a table. -message TableReadOptions { - // Optional. Names of the fields in the table that should be read. If empty, - // all fields will be read. If the specified field is a nested field, all the - // sub-fields in the field will be selected. The output field order is - // unrelated to the order of fields in selected_fields. - repeated string selected_fields = 1; - - // Optional. SQL text filtering statement, similar to a WHERE clause in - // a query. Aggregates are not supported. - // - // Examples: "int_field > 5" - // "date_field = CAST('2014-9-27' as DATE)" - // "nullable_field is not NULL" - // "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))" - // "numeric_field BETWEEN 1.0 AND 5.0" - string row_restriction = 2; -} diff --git a/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py b/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py deleted file mode 100644 index 4d3bfcaa..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta1/proto/read_options.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta1/proto/read_options.proto", - package="google.cloud.bigquery.storage.v1beta1", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n>google/cloud/bigquery_storage_v1beta1/proto/read_options.proto\x12%google.cloud.bigquery.storage.v1beta1"D\n\x10TableReadOptions\x12\x17\n\x0fselected_fields\x18\x01 \x03(\t\x12\x17\n\x0frow_restriction\x18\x02 \x01(\tBy\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3', -) - - -_TABLEREADOPTIONS = _descriptor.Descriptor( - name="TableReadOptions", - full_name="google.cloud.bigquery.storage.v1beta1.TableReadOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="selected_fields", - full_name="google.cloud.bigquery.storage.v1beta1.TableReadOptions.selected_fields", - index=0, - number=1, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_restriction", - full_name="google.cloud.bigquery.storage.v1beta1.TableReadOptions.row_restriction", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=105, - serialized_end=173, -) - -DESCRIPTOR.message_types_by_name["TableReadOptions"] = _TABLEREADOPTIONS -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -TableReadOptions = _reflection.GeneratedProtocolMessageType( - "TableReadOptions", - (_message.Message,), - { - "DESCRIPTOR": _TABLEREADOPTIONS, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.read_options_pb2", - "__doc__": """Options dictating how we read a table. - - Attributes: - selected_fields: - Optional. Names of the fields in the table that should be - read. If empty, all fields will be read. If the specified - field is a nested field, all the sub-fields in the field will - be selected. The output field order is unrelated to the order - of fields in selected_fields. - row_restriction: - Optional. SQL text filtering statement, similar to a WHERE - clause in a query. Aggregates are not supported. Examples: - “int_field > 5” “date_field = CAST(‘2014-9-27’ as DATE)” - “nullable_field is not NULL” “st_equals(geo_field, - st_geofromtext(”POINT(2, 2)“))” “numeric_field BETWEEN 1.0 AND - 5.0” - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableReadOptions) - }, -) -_sym_db.RegisterMessage(TableReadOptions) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/read_options_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta1/proto/storage.proto b/google/cloud/bigquery_storage_v1beta1/proto/storage.proto deleted file mode 100644 index 81e77c73..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/storage.proto +++ /dev/null @@ -1,406 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta1; - -import "google/api/annotations.proto"; -import "google/api/client.proto"; -import "google/api/field_behavior.proto"; -import "google/api/resource.proto"; -import "google/cloud/bigquery/storage/v1beta1/arrow.proto"; -import "google/cloud/bigquery/storage/v1beta1/avro.proto"; -import "google/cloud/bigquery/storage/v1beta1/read_options.proto"; -import "google/cloud/bigquery/storage/v1beta1/table_reference.proto"; -import "google/protobuf/empty.proto"; -import "google/protobuf/timestamp.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage"; -option java_package = "com.google.cloud.bigquery.storage.v1beta1"; - -// BigQuery storage API. -// -// The BigQuery storage API can be used to read data stored in BigQuery. -service BigQueryStorage { - option (google.api.default_host) = "bigquerystorage.googleapis.com"; - option (google.api.oauth_scopes) = - "https://www.googleapis.com/auth/bigquery," - "https://www.googleapis.com/auth/bigquery.readonly," - "https://www.googleapis.com/auth/cloud-platform"; - - // Creates a new read session. A read session divides the contents of a - // BigQuery table into one or more streams, which can then be used to read - // data from the table. The read session also specifies properties of the - // data to be read, such as a list of columns or a push-down filter describing - // the rows to be returned. - // - // A particular row can be read by at most one stream. When the caller has - // reached the end of each stream in the session, then all the data in the - // table has been read. - // - // Read sessions automatically expire 24 hours after they are created and do - // not require manual clean-up by the caller. - rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) { - option (google.api.http) = { - post: "/v1beta1/{table_reference.project_id=projects/*}" - body: "*" - additional_bindings { - post: "/v1beta1/{table_reference.dataset_id=projects/*/datasets/*}" - body: "*" - } - }; - option (google.api.method_signature) = "table_reference,parent,requested_streams"; - } - - // Reads rows from the table in the format prescribed by the read session. - // Each response contains one or more table rows, up to a maximum of 10 MiB - // per response; read requests which attempt to read individual rows larger - // than this will fail. - // - // Each request also returns a set of stream statistics reflecting the - // estimated total number of rows in the read stream. This number is computed - // based on the total table size and the number of active streams in the read - // session, and may change as other streams continue to read data. - rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) { - option (google.api.http) = { - get: "/v1beta1/{read_position.stream.name=projects/*/streams/*}" - }; - option (google.api.method_signature) = "read_position"; - } - - // Creates additional streams for a ReadSession. This API can be used to - // dynamically adjust the parallelism of a batch processing task upwards by - // adding additional workers. - rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) { - option (google.api.http) = { - post: "/v1beta1/{session.name=projects/*/sessions/*}" - body: "*" - }; - option (google.api.method_signature) = "session,requested_streams"; - } - - // Triggers the graceful termination of a single stream in a ReadSession. This - // API can be used to dynamically adjust the parallelism of a batch processing - // task downwards without losing data. - // - // This API does not delete the stream -- it remains visible in the - // ReadSession, and any data processed by the stream is not released to other - // streams. However, no additional data will be assigned to the stream once - // this call completes. Callers must continue reading data on the stream until - // the end of the stream is reached so that data which has already been - // assigned to the stream will be processed. - // - // This method will return an error if there are no other live streams - // in the Session, or if SplitReadStream() has been called on the given - // Stream. - rpc FinalizeStream(FinalizeStreamRequest) returns (google.protobuf.Empty) { - option (google.api.http) = { - post: "/v1beta1/{stream.name=projects/*/streams/*}" - body: "*" - }; - option (google.api.method_signature) = "stream"; - } - - // Splits a given read stream into two Streams. These streams are referred to - // as the primary and the residual of the split. The original stream can still - // be read from in the same manner as before. Both of the returned streams can - // also be read from, and the total rows return by both child streams will be - // the same as the rows read from the original stream. - // - // Moreover, the two child streams will be allocated back to back in the - // original Stream. Concretely, it is guaranteed that for streams Original, - // Primary, and Residual, that Original[0-j] = Primary[0-j] and - // Original[j-n] = Residual[0-m] once the streams have been read to - // completion. - // - // This method is guaranteed to be idempotent. - rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) { - option (google.api.http) = { - get: "/v1beta1/{original_stream.name=projects/*/streams/*}" - }; - option (google.api.method_signature) = "original_stream"; - } -} - -// Information about a single data stream within a read session. -message Stream { - option (google.api.resource) = { - type: "bigquerystorage.googleapis.com/Stream" - pattern: "projects/{project}/locations/{location}/streams/{stream}" - }; - - // Name of the stream, in the form - // `projects/{project_id}/locations/{location}/streams/{stream_id}`. - string name = 1; -} - -// Expresses a point within a given stream using an offset position. -message StreamPosition { - // Identifier for a given Stream. - Stream stream = 1; - - // Position in the stream. - int64 offset = 2; -} - -// Information returned from a `CreateReadSession` request. -message ReadSession { - option (google.api.resource) = { - type: "bigquerystorage.googleapis.com/ReadSession" - pattern: "projects/{project}/locations/{location}/sessions/{session}" - }; - - // Unique identifier for the session, in the form - // `projects/{project_id}/locations/{location}/sessions/{session_id}`. - string name = 1; - - // Time at which the session becomes invalid. After this time, subsequent - // requests to read this Session will return errors. - google.protobuf.Timestamp expire_time = 2; - - // The schema for the read. If read_options.selected_fields is set, the - // schema may be different from the table schema as it will only contain - // the selected fields. - oneof schema { - // Avro schema. - AvroSchema avro_schema = 5; - - // Arrow schema. - ArrowSchema arrow_schema = 6; - } - - // Streams associated with this session. - repeated Stream streams = 4; - - // Table that this ReadSession is reading from. - TableReference table_reference = 7; - - // Any modifiers which are applied when reading from the specified table. - TableModifiers table_modifiers = 8; - - // The strategy to use for distributing data among the streams. - ShardingStrategy sharding_strategy = 9; -} - -// Creates a new read session, which may include additional options such as -// requested parallelism, projection filters and constraints. -message CreateReadSessionRequest { - // Required. Reference to the table to read. - TableReference table_reference = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. String of the form `projects/{project_id}` indicating the - // project this ReadSession is associated with. This is the project that will - // be billed for usage. - string parent = 6 [ - (google.api.field_behavior) = REQUIRED, - (google.api.resource_reference) = { - type: "cloudresourcemanager.googleapis.com/Project" - } - ]; - - // Any modifiers to the Table (e.g. snapshot timestamp). - TableModifiers table_modifiers = 2; - - // Initial number of streams. If unset or 0, we will - // provide a value of streams so as to produce reasonable throughput. Must be - // non-negative. The number of streams may be lower than the requested number, - // depending on the amount parallelism that is reasonable for the table and - // the maximum amount of parallelism allowed by the system. - // - // Streams must be read starting from offset 0. - int32 requested_streams = 3; - - // Read options for this session (e.g. column selection, filters). - TableReadOptions read_options = 4; - - // Data output format. Currently default to Avro. - DataFormat format = 5; - - // The strategy to use for distributing data among multiple streams. Currently - // defaults to liquid sharding. - ShardingStrategy sharding_strategy = 7; -} - -// Data format for input or output data. -enum DataFormat { - // Data format is unspecified. - DATA_FORMAT_UNSPECIFIED = 0; - - // Avro is a standard open source row based file format. - // See https://avro.apache.org/ for more details. - AVRO = 1; - - ARROW = 3; -} - -// Strategy for distributing data among multiple streams in a read session. -enum ShardingStrategy { - // Same as LIQUID. - SHARDING_STRATEGY_UNSPECIFIED = 0; - - // Assigns data to each stream based on the client's read rate. The faster the - // client reads from a stream, the more data is assigned to the stream. In - // this strategy, it's possible to read all data from a single stream even if - // there are other streams present. - LIQUID = 1; - - // Assigns data to each stream such that roughly the same number of rows can - // be read from each stream. Because the server-side unit for assigning data - // is collections of rows, the API does not guarantee that each stream will - // return the same number or rows. Additionally, the limits are enforced based - // on the number of pre-filtering rows, so some filters can lead to lopsided - // assignments. - BALANCED = 2; -} - -// Requesting row data via `ReadRows` must provide Stream position information. -message ReadRowsRequest { - // Required. Identifier of the position in the stream to start reading from. - // The offset requested must be less than the last row read from ReadRows. - // Requesting a larger offset is undefined. - StreamPosition read_position = 1 [(google.api.field_behavior) = REQUIRED]; -} - -// Progress information for a given Stream. -message StreamStatus { - // Number of estimated rows in the current stream. May change over time as - // different readers in the stream progress at rates which are relatively fast - // or slow. - int64 estimated_row_count = 1; - - // A value in the range [0.0, 1.0] that represents the fraction of rows - // assigned to this stream that have been processed by the server. In the - // presence of read filters, the server may process more rows than it returns, - // so this value reflects progress through the pre-filtering rows. - // - // This value is only populated for sessions created through the BALANCED - // sharding strategy. - float fraction_consumed = 2; - - // Represents the progress of the current stream. - Progress progress = 4; - - // Whether this stream can be split. For sessions that use the LIQUID sharding - // strategy, this value is always false. For BALANCED sessions, this value is - // false when enough data have been read such that no more splits are possible - // at that point or beyond. For small tables or streams that are the result of - // a chain of splits, this value may never be true. - bool is_splittable = 3; -} - -message Progress { - // The fraction of rows assigned to the stream that have been processed by the - // server so far, not including the rows in the current response message. - // - // This value, along with `at_response_end`, can be used to interpolate the - // progress made as the rows in the message are being processed using the - // following formula: `at_response_start + (at_response_end - - // at_response_start) * rows_processed_from_response / rows_in_response`. - // - // Note that if a filter is provided, the `at_response_end` value of the - // previous response may not necessarily be equal to the `at_response_start` - // value of the current response. - float at_response_start = 1; - - // Similar to `at_response_start`, except that this value includes the rows in - // the current response. - float at_response_end = 2; -} - -// Information on if the current connection is being throttled. -message ThrottleStatus { - // How much this connection is being throttled. - // 0 is no throttling, 100 is completely throttled. - int32 throttle_percent = 1; -} - -// Response from calling `ReadRows` may include row data, progress and -// throttling information. -message ReadRowsResponse { - // Row data is returned in format specified during session creation. - oneof rows { - // Serialized row data in AVRO format. - AvroRows avro_rows = 3; - - // Serialized row data in Arrow RecordBatch format. - ArrowRecordBatch arrow_record_batch = 4; - } - - // Number of serialized rows in the rows block. This value is recorded here, - // in addition to the row_count values in the output-specific messages in - // `rows`, so that code which needs to record progress through the stream can - // do so in an output format-independent way. - int64 row_count = 6; - - // Estimated stream statistics. - StreamStatus status = 2; - - // Throttling status. If unset, the latest response still describes - // the current throttling status. - ThrottleStatus throttle_status = 5; -} - -// Information needed to request additional streams for an established read -// session. -message BatchCreateReadSessionStreamsRequest { - // Required. Must be a non-expired session obtained from a call to - // CreateReadSession. Only the name field needs to be set. - ReadSession session = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Number of new streams requested. Must be positive. - // Number of added streams may be less than this, see CreateReadSessionRequest - // for more information. - int32 requested_streams = 2 [(google.api.field_behavior) = REQUIRED]; -} - -// The response from `BatchCreateReadSessionStreams` returns the stream -// identifiers for the newly created streams. -message BatchCreateReadSessionStreamsResponse { - // Newly added streams. - repeated Stream streams = 1; -} - -// Request information for invoking `FinalizeStream`. -message FinalizeStreamRequest { - // Required. Stream to finalize. - Stream stream = 2 [(google.api.field_behavior) = REQUIRED]; -} - -// Request information for `SplitReadStream`. -message SplitReadStreamRequest { - // Required. Stream to split. - Stream original_stream = 1 [(google.api.field_behavior) = REQUIRED]; - - // A value in the range (0.0, 1.0) that specifies the fractional point at - // which the original stream should be split. The actual split point is - // evaluated on pre-filtered rows, so if a filter is provided, then there is - // no guarantee that the division of the rows between the new child streams - // will be proportional to this fractional value. Additionally, because the - // server-side unit for assigning data is collections of rows, this fraction - // will always map to to a data storage boundary on the server side. - float fraction = 2; -} - -// Response from `SplitReadStream`. -message SplitReadStreamResponse { - // Primary stream, which contains the beginning portion of - // |original_stream|. An empty value indicates that the original stream can no - // longer be split. - Stream primary_stream = 1; - - // Remainder stream, which contains the tail of |original_stream|. An empty - // value indicates that the original stream can no longer be split. - Stream remainder_stream = 2; -} diff --git a/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py b/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py deleted file mode 100644 index f77296ec..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2.py +++ /dev/null @@ -1,1766 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta1/proto/storage.proto -"""Generated protocol buffer code.""" -from google.protobuf.internal import enum_type_wrapper -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import resource_pb2 as google_dot_api_dot_resource__pb2 -from google.cloud.bigquery_storage_v1beta1.proto import ( - arrow_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_arrow__pb2, -) -from google.cloud.bigquery_storage_v1beta1.proto import ( - avro_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_avro__pb2, -) -from google.cloud.bigquery_storage_v1beta1.proto import ( - read_options_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_read__options__pb2, -) -from google.cloud.bigquery_storage_v1beta1.proto import ( - table_reference_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_table__reference__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta1/proto/storage.proto", - package="google.cloud.bigquery.storage.v1beta1", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n9google/cloud/bigquery_storage_v1beta1/proto/storage.proto\x12%google.cloud.bigquery.storage.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\x1a\x37google/cloud/bigquery_storage_v1beta1/proto/arrow.proto\x1a\x36google/cloud/bigquery_storage_v1beta1/proto/avro.proto\x1a>google/cloud/bigquery_storage_v1beta1/proto/read_options.proto\x1a\x41google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto"|\n\x06Stream\x12\x0c\n\x04name\x18\x01 \x01(\t:d\xea\x41\x61\n%bigquerystorage.googleapis.com/Stream\x12\x38projects/{project}/locations/{location}/streams/{stream}"_\n\x0eStreamPosition\x12=\n\x06stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12\x0e\n\x06offset\x18\x02 \x01(\x03"\x8d\x05\n\x0bReadSession\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\x0b\x65xpire_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12H\n\x0b\x61vro_schema\x18\x05 \x01(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta1.AvroSchemaH\x00\x12J\n\x0c\x61rrow_schema\x18\x06 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta1.ArrowSchemaH\x00\x12>\n\x07streams\x18\x04 \x03(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12N\n\x0ftable_reference\x18\x07 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableReference\x12N\n\x0ftable_modifiers\x18\x08 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableModifiers\x12R\n\x11sharding_strategy\x18\t \x01(\x0e\x32\x37.google.cloud.bigquery.storage.v1beta1.ShardingStrategy:k\xea\x41h\n*bigquerystorage.googleapis.com/ReadSession\x12:projects/{project}/locations/{location}/sessions/{session}B\x08\n\x06schema"\x85\x04\n\x18\x43reateReadSessionRequest\x12S\n\x0ftable_reference\x18\x01 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableReferenceB\x03\xe0\x41\x02\x12\x43\n\x06parent\x18\x06 \x01(\tB3\xe0\x41\x02\xfa\x41-\n+cloudresourcemanager.googleapis.com/Project\x12N\n\x0ftable_modifiers\x18\x02 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.TableModifiers\x12\x19\n\x11requested_streams\x18\x03 \x01(\x05\x12M\n\x0cread_options\x18\x04 \x01(\x0b\x32\x37.google.cloud.bigquery.storage.v1beta1.TableReadOptions\x12\x41\n\x06\x66ormat\x18\x05 \x01(\x0e\x32\x31.google.cloud.bigquery.storage.v1beta1.DataFormat\x12R\n\x11sharding_strategy\x18\x07 \x01(\x0e\x32\x37.google.cloud.bigquery.storage.v1beta1.ShardingStrategy"d\n\x0fReadRowsRequest\x12Q\n\rread_position\x18\x01 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.StreamPositionB\x03\xe0\x41\x02"\xa0\x01\n\x0cStreamStatus\x12\x1b\n\x13\x65stimated_row_count\x18\x01 \x01(\x03\x12\x19\n\x11\x66raction_consumed\x18\x02 \x01(\x02\x12\x41\n\x08progress\x18\x04 \x01(\x0b\x32/.google.cloud.bigquery.storage.v1beta1.Progress\x12\x15\n\ris_splittable\x18\x03 \x01(\x08">\n\x08Progress\x12\x19\n\x11\x61t_response_start\x18\x01 \x01(\x02\x12\x17\n\x0f\x61t_response_end\x18\x02 \x01(\x02"*\n\x0eThrottleStatus\x12\x18\n\x10throttle_percent\x18\x01 \x01(\x05"\xdf\x02\n\x10ReadRowsResponse\x12\x44\n\tavro_rows\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.storage.v1beta1.AvroRowsH\x00\x12U\n\x12\x61rrow_record_batch\x18\x04 \x01(\x0b\x32\x37.google.cloud.bigquery.storage.v1beta1.ArrowRecordBatchH\x00\x12\x11\n\trow_count\x18\x06 \x01(\x03\x12\x43\n\x06status\x18\x02 \x01(\x0b\x32\x33.google.cloud.bigquery.storage.v1beta1.StreamStatus\x12N\n\x0fthrottle_status\x18\x05 \x01(\x0b\x32\x35.google.cloud.bigquery.storage.v1beta1.ThrottleStatusB\x06\n\x04rows"\x90\x01\n$BatchCreateReadSessionStreamsRequest\x12H\n\x07session\x18\x01 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta1.ReadSessionB\x03\xe0\x41\x02\x12\x1e\n\x11requested_streams\x18\x02 \x01(\x05\x42\x03\xe0\x41\x02"g\n%BatchCreateReadSessionStreamsResponse\x12>\n\x07streams\x18\x01 \x03(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream"[\n\x15\x46inalizeStreamRequest\x12\x42\n\x06stream\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.StreamB\x03\xe0\x41\x02"w\n\x16SplitReadStreamRequest\x12K\n\x0foriginal_stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.StreamB\x03\xe0\x41\x02\x12\x10\n\x08\x66raction\x18\x02 \x01(\x02"\xa9\x01\n\x17SplitReadStreamResponse\x12\x45\n\x0eprimary_stream\x18\x01 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream\x12G\n\x10remainder_stream\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.storage.v1beta1.Stream*>\n\nDataFormat\x12\x1b\n\x17\x44\x41TA_FORMAT_UNSPECIFIED\x10\x00\x12\x08\n\x04\x41VRO\x10\x01\x12\t\n\x05\x41RROW\x10\x03*O\n\x10ShardingStrategy\x12!\n\x1dSHARDING_STRATEGY_UNSPECIFIED\x10\x00\x12\n\n\x06LIQUID\x10\x01\x12\x0c\n\x08\x42\x41LANCED\x10\x02\x32\xeb\n\n\x0f\x42igQueryStorage\x12\xb3\x02\n\x11\x43reateReadSession\x12?.google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest\x1a\x32.google.cloud.bigquery.storage.v1beta1.ReadSession"\xa8\x01\x82\xd3\xe4\x93\x02w"0/v1beta1/{table_reference.project_id=projects/*}:\x01*Z@";/v1beta1/{table_reference.dataset_id=projects/*/datasets/*}:\x01*\xda\x41(table_reference,parent,requested_streams\x12\xd0\x01\n\x08ReadRows\x12\x36.google.cloud.bigquery.storage.v1beta1.ReadRowsRequest\x1a\x37.google.cloud.bigquery.storage.v1beta1.ReadRowsResponse"Q\x82\xd3\xe4\x93\x02;\x12\x39/v1beta1/{read_position.stream.name=projects/*/streams/*}\xda\x41\rread_position0\x01\x12\x90\x02\n\x1d\x42\x61tchCreateReadSessionStreams\x12K.google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest\x1aL.google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse"T\x82\xd3\xe4\x93\x02\x32"-/v1beta1/{session.name=projects/*/sessions/*}:\x01*\xda\x41\x19session,requested_streams\x12\xa7\x01\n\x0e\x46inalizeStream\x12<.google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest\x1a\x16.google.protobuf.Empty"?\x82\xd3\xe4\x93\x02\x30"+/v1beta1/{stream.name=projects/*/streams/*}:\x01*\xda\x41\x06stream\x12\xe0\x01\n\x0fSplitReadStream\x12=.google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest\x1a>.google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse"N\x82\xd3\xe4\x93\x02\x36\x12\x34/v1beta1/{original_stream.name=projects/*/streams/*}\xda\x41\x0foriginal_stream\x1a\xae\x01\xca\x41\x1e\x62igquerystorage.googleapis.com\xd2\x41\x89\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platformBy\n)com.google.cloud.bigquery.storage.v1beta1ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3', - dependencies=[ - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_resource__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_arrow__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_avro__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_read__options__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_table__reference__pb2.DESCRIPTOR, - google_dot_protobuf_dot_empty__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - ], -) - -_DATAFORMAT = _descriptor.EnumDescriptor( - name="DataFormat", - full_name="google.cloud.bigquery.storage.v1beta1.DataFormat", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DATA_FORMAT_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="AVRO", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARROW", - index=2, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=3285, - serialized_end=3347, -) -_sym_db.RegisterEnumDescriptor(_DATAFORMAT) - -DataFormat = enum_type_wrapper.EnumTypeWrapper(_DATAFORMAT) -_SHARDINGSTRATEGY = _descriptor.EnumDescriptor( - name="ShardingStrategy", - full_name="google.cloud.bigquery.storage.v1beta1.ShardingStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="SHARDING_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LIQUID", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BALANCED", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=3349, - serialized_end=3428, -) -_sym_db.RegisterEnumDescriptor(_SHARDINGSTRATEGY) - -ShardingStrategy = enum_type_wrapper.EnumTypeWrapper(_SHARDINGSTRATEGY) -DATA_FORMAT_UNSPECIFIED = 0 -AVRO = 1 -ARROW = 3 -SHARDING_STRATEGY_UNSPECIFIED = 0 -LIQUID = 1 -BALANCED = 2 - - -_STREAM = _descriptor.Descriptor( - name="Stream", - full_name="google.cloud.bigquery.storage.v1beta1.Stream", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1beta1.Stream.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"\352Aa\n%bigquerystorage.googleapis.com/Stream\0228projects/{project}/locations/{location}/streams/{stream}", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=521, - serialized_end=645, -) - - -_STREAMPOSITION = _descriptor.Descriptor( - name="StreamPosition", - full_name="google.cloud.bigquery.storage.v1beta1.StreamPosition", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="stream", - full_name="google.cloud.bigquery.storage.v1beta1.StreamPosition.stream", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="offset", - full_name="google.cloud.bigquery.storage.v1beta1.StreamPosition.offset", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=647, - serialized_end=742, -) - - -_READSESSION = _descriptor.Descriptor( - name="ReadSession", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="expire_time", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.expire_time", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="avro_schema", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.avro_schema", - index=2, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_schema", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.arrow_schema", - index=3, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="streams", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.streams", - index=4, - number=4, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table_reference", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.table_reference", - index=5, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table_modifiers", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.table_modifiers", - index=6, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="sharding_strategy", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.sharding_strategy", - index=7, - number=9, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"\352Ah\n*bigquerystorage.googleapis.com/ReadSession\022:projects/{project}/locations/{location}/sessions/{session}", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="schema", - full_name="google.cloud.bigquery.storage.v1beta1.ReadSession.schema", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=745, - serialized_end=1398, -) - - -_CREATEREADSESSIONREQUEST = _descriptor.Descriptor( - name="CreateReadSessionRequest", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="table_reference", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.table_reference", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="parent", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.parent", - index=1, - number=6, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A-\n+cloudresourcemanager.googleapis.com/Project", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table_modifiers", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.table_modifiers", - index=2, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="requested_streams", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.requested_streams", - index=3, - number=3, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="read_options", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.read_options", - index=4, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="format", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.format", - index=5, - number=5, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="sharding_strategy", - full_name="google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest.sharding_strategy", - index=6, - number=7, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1401, - serialized_end=1918, -) - - -_READROWSREQUEST = _descriptor.Descriptor( - name="ReadRowsRequest", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="read_position", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsRequest.read_position", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1920, - serialized_end=2020, -) - - -_STREAMSTATUS = _descriptor.Descriptor( - name="StreamStatus", - full_name="google.cloud.bigquery.storage.v1beta1.StreamStatus", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="estimated_row_count", - full_name="google.cloud.bigquery.storage.v1beta1.StreamStatus.estimated_row_count", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="fraction_consumed", - full_name="google.cloud.bigquery.storage.v1beta1.StreamStatus.fraction_consumed", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="progress", - full_name="google.cloud.bigquery.storage.v1beta1.StreamStatus.progress", - index=2, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="is_splittable", - full_name="google.cloud.bigquery.storage.v1beta1.StreamStatus.is_splittable", - index=3, - number=3, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2023, - serialized_end=2183, -) - - -_PROGRESS = _descriptor.Descriptor( - name="Progress", - full_name="google.cloud.bigquery.storage.v1beta1.Progress", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="at_response_start", - full_name="google.cloud.bigquery.storage.v1beta1.Progress.at_response_start", - index=0, - number=1, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="at_response_end", - full_name="google.cloud.bigquery.storage.v1beta1.Progress.at_response_end", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2185, - serialized_end=2247, -) - - -_THROTTLESTATUS = _descriptor.Descriptor( - name="ThrottleStatus", - full_name="google.cloud.bigquery.storage.v1beta1.ThrottleStatus", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="throttle_percent", - full_name="google.cloud.bigquery.storage.v1beta1.ThrottleStatus.throttle_percent", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2249, - serialized_end=2291, -) - - -_READROWSRESPONSE = _descriptor.Descriptor( - name="ReadRowsResponse", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="avro_rows", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.avro_rows", - index=0, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_record_batch", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.arrow_record_batch", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.row_count", - index=2, - number=6, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="status", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.status", - index=3, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="throttle_status", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.throttle_status", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="rows", - full_name="google.cloud.bigquery.storage.v1beta1.ReadRowsResponse.rows", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=2294, - serialized_end=2645, -) - - -_BATCHCREATEREADSESSIONSTREAMSREQUEST = _descriptor.Descriptor( - name="BatchCreateReadSessionStreamsRequest", - full_name="google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="session", - full_name="google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest.session", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="requested_streams", - full_name="google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest.requested_streams", - index=1, - number=2, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2648, - serialized_end=2792, -) - - -_BATCHCREATEREADSESSIONSTREAMSRESPONSE = _descriptor.Descriptor( - name="BatchCreateReadSessionStreamsResponse", - full_name="google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="streams", - full_name="google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse.streams", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2794, - serialized_end=2897, -) - - -_FINALIZESTREAMREQUEST = _descriptor.Descriptor( - name="FinalizeStreamRequest", - full_name="google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="stream", - full_name="google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest.stream", - index=0, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2899, - serialized_end=2990, -) - - -_SPLITREADSTREAMREQUEST = _descriptor.Descriptor( - name="SplitReadStreamRequest", - full_name="google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="original_stream", - full_name="google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest.original_stream", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="fraction", - full_name="google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest.fraction", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2992, - serialized_end=3111, -) - - -_SPLITREADSTREAMRESPONSE = _descriptor.Descriptor( - name="SplitReadStreamResponse", - full_name="google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="primary_stream", - full_name="google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse.primary_stream", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="remainder_stream", - full_name="google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse.remainder_stream", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3114, - serialized_end=3283, -) - -_STREAMPOSITION.fields_by_name["stream"].message_type = _STREAM -_READSESSION.fields_by_name[ - "expire_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_READSESSION.fields_by_name[ - "avro_schema" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_avro__pb2._AVROSCHEMA -) -_READSESSION.fields_by_name[ - "arrow_schema" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_arrow__pb2._ARROWSCHEMA -) -_READSESSION.fields_by_name["streams"].message_type = _STREAM -_READSESSION.fields_by_name[ - "table_reference" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEREFERENCE -) -_READSESSION.fields_by_name[ - "table_modifiers" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEMODIFIERS -) -_READSESSION.fields_by_name["sharding_strategy"].enum_type = _SHARDINGSTRATEGY -_READSESSION.oneofs_by_name["schema"].fields.append( - _READSESSION.fields_by_name["avro_schema"] -) -_READSESSION.fields_by_name[ - "avro_schema" -].containing_oneof = _READSESSION.oneofs_by_name["schema"] -_READSESSION.oneofs_by_name["schema"].fields.append( - _READSESSION.fields_by_name["arrow_schema"] -) -_READSESSION.fields_by_name[ - "arrow_schema" -].containing_oneof = _READSESSION.oneofs_by_name["schema"] -_CREATEREADSESSIONREQUEST.fields_by_name[ - "table_reference" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEREFERENCE -) -_CREATEREADSESSIONREQUEST.fields_by_name[ - "table_modifiers" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_table__reference__pb2._TABLEMODIFIERS -) -_CREATEREADSESSIONREQUEST.fields_by_name[ - "read_options" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_read__options__pb2._TABLEREADOPTIONS -) -_CREATEREADSESSIONREQUEST.fields_by_name["format"].enum_type = _DATAFORMAT -_CREATEREADSESSIONREQUEST.fields_by_name[ - "sharding_strategy" -].enum_type = _SHARDINGSTRATEGY -_READROWSREQUEST.fields_by_name["read_position"].message_type = _STREAMPOSITION -_STREAMSTATUS.fields_by_name["progress"].message_type = _PROGRESS -_READROWSRESPONSE.fields_by_name[ - "avro_rows" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_avro__pb2._AVROROWS -) -_READROWSRESPONSE.fields_by_name[ - "arrow_record_batch" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_arrow__pb2._ARROWRECORDBATCH -) -_READROWSRESPONSE.fields_by_name["status"].message_type = _STREAMSTATUS -_READROWSRESPONSE.fields_by_name["throttle_status"].message_type = _THROTTLESTATUS -_READROWSRESPONSE.oneofs_by_name["rows"].fields.append( - _READROWSRESPONSE.fields_by_name["avro_rows"] -) -_READROWSRESPONSE.fields_by_name[ - "avro_rows" -].containing_oneof = _READROWSRESPONSE.oneofs_by_name["rows"] -_READROWSRESPONSE.oneofs_by_name["rows"].fields.append( - _READROWSRESPONSE.fields_by_name["arrow_record_batch"] -) -_READROWSRESPONSE.fields_by_name[ - "arrow_record_batch" -].containing_oneof = _READROWSRESPONSE.oneofs_by_name["rows"] -_BATCHCREATEREADSESSIONSTREAMSREQUEST.fields_by_name[ - "session" -].message_type = _READSESSION -_BATCHCREATEREADSESSIONSTREAMSRESPONSE.fields_by_name["streams"].message_type = _STREAM -_FINALIZESTREAMREQUEST.fields_by_name["stream"].message_type = _STREAM -_SPLITREADSTREAMREQUEST.fields_by_name["original_stream"].message_type = _STREAM -_SPLITREADSTREAMRESPONSE.fields_by_name["primary_stream"].message_type = _STREAM -_SPLITREADSTREAMRESPONSE.fields_by_name["remainder_stream"].message_type = _STREAM -DESCRIPTOR.message_types_by_name["Stream"] = _STREAM -DESCRIPTOR.message_types_by_name["StreamPosition"] = _STREAMPOSITION -DESCRIPTOR.message_types_by_name["ReadSession"] = _READSESSION -DESCRIPTOR.message_types_by_name["CreateReadSessionRequest"] = _CREATEREADSESSIONREQUEST -DESCRIPTOR.message_types_by_name["ReadRowsRequest"] = _READROWSREQUEST -DESCRIPTOR.message_types_by_name["StreamStatus"] = _STREAMSTATUS -DESCRIPTOR.message_types_by_name["Progress"] = _PROGRESS -DESCRIPTOR.message_types_by_name["ThrottleStatus"] = _THROTTLESTATUS -DESCRIPTOR.message_types_by_name["ReadRowsResponse"] = _READROWSRESPONSE -DESCRIPTOR.message_types_by_name[ - "BatchCreateReadSessionStreamsRequest" -] = _BATCHCREATEREADSESSIONSTREAMSREQUEST -DESCRIPTOR.message_types_by_name[ - "BatchCreateReadSessionStreamsResponse" -] = _BATCHCREATEREADSESSIONSTREAMSRESPONSE -DESCRIPTOR.message_types_by_name["FinalizeStreamRequest"] = _FINALIZESTREAMREQUEST -DESCRIPTOR.message_types_by_name["SplitReadStreamRequest"] = _SPLITREADSTREAMREQUEST -DESCRIPTOR.message_types_by_name["SplitReadStreamResponse"] = _SPLITREADSTREAMRESPONSE -DESCRIPTOR.enum_types_by_name["DataFormat"] = _DATAFORMAT -DESCRIPTOR.enum_types_by_name["ShardingStrategy"] = _SHARDINGSTRATEGY -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Stream = _reflection.GeneratedProtocolMessageType( - "Stream", - (_message.Message,), - { - "DESCRIPTOR": _STREAM, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Information about a single data stream within a read session. - - Attributes: - name: - Name of the stream, in the form ``projects/{project_id}/locati - ons/{location}/streams/{stream_id}``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.Stream) - }, -) -_sym_db.RegisterMessage(Stream) - -StreamPosition = _reflection.GeneratedProtocolMessageType( - "StreamPosition", - (_message.Message,), - { - "DESCRIPTOR": _STREAMPOSITION, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Expresses a point within a given stream using an offset position. - - Attributes: - stream: - Identifier for a given Stream. - offset: - Position in the stream. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.StreamPosition) - }, -) -_sym_db.RegisterMessage(StreamPosition) - -ReadSession = _reflection.GeneratedProtocolMessageType( - "ReadSession", - (_message.Message,), - { - "DESCRIPTOR": _READSESSION, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Information returned from a ``CreateReadSession`` request. - - Attributes: - name: - Unique identifier for the session, in the form ``projects/{pro - ject_id}/locations/{location}/sessions/{session_id}``. - expire_time: - Time at which the session becomes invalid. After this time, - subsequent requests to read this Session will return errors. - schema: - The schema for the read. If read_options.selected_fields is - set, the schema may be different from the table schema as it - will only contain the selected fields. - avro_schema: - Avro schema. - arrow_schema: - Arrow schema. - streams: - Streams associated with this session. - table_reference: - Table that this ReadSession is reading from. - table_modifiers: - Any modifiers which are applied when reading from the - specified table. - sharding_strategy: - The strategy to use for distributing data among the streams. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadSession) - }, -) -_sym_db.RegisterMessage(ReadSession) - -CreateReadSessionRequest = _reflection.GeneratedProtocolMessageType( - "CreateReadSessionRequest", - (_message.Message,), - { - "DESCRIPTOR": _CREATEREADSESSIONREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Creates a new read session, which may include additional options such - as requested parallelism, projection filters and constraints. - - Attributes: - table_reference: - Required. Reference to the table to read. - parent: - Required. String of the form ``projects/{project_id}`` - indicating the project this ReadSession is associated with. - This is the project that will be billed for usage. - table_modifiers: - Any modifiers to the Table (e.g. snapshot timestamp). - requested_streams: - Initial number of streams. If unset or 0, we will provide a - value of streams so as to produce reasonable throughput. Must - be non-negative. The number of streams may be lower than the - requested number, depending on the amount parallelism that is - reasonable for the table and the maximum amount of parallelism - allowed by the system. Streams must be read starting from - offset 0. - read_options: - Read options for this session (e.g. column selection, - filters). - format: - Data output format. Currently default to Avro. - sharding_strategy: - The strategy to use for distributing data among multiple - streams. Currently defaults to liquid sharding. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.CreateReadSessionRequest) - }, -) -_sym_db.RegisterMessage(CreateReadSessionRequest) - -ReadRowsRequest = _reflection.GeneratedProtocolMessageType( - "ReadRowsRequest", - (_message.Message,), - { - "DESCRIPTOR": _READROWSREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Requesting row data via ``ReadRows`` must provide Stream position - information. - - Attributes: - read_position: - Required. Identifier of the position in the stream to start - reading from. The offset requested must be less than the last - row read from ReadRows. Requesting a larger offset is - undefined. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadRowsRequest) - }, -) -_sym_db.RegisterMessage(ReadRowsRequest) - -StreamStatus = _reflection.GeneratedProtocolMessageType( - "StreamStatus", - (_message.Message,), - { - "DESCRIPTOR": _STREAMSTATUS, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Progress information for a given Stream. - - Attributes: - estimated_row_count: - Number of estimated rows in the current stream. May change - over time as different readers in the stream progress at rates - which are relatively fast or slow. - fraction_consumed: - A value in the range [0.0, 1.0] that represents the fraction - of rows assigned to this stream that have been processed by - the server. In the presence of read filters, the server may - process more rows than it returns, so this value reflects - progress through the pre-filtering rows. This value is only - populated for sessions created through the BALANCED sharding - strategy. - progress: - Represents the progress of the current stream. - is_splittable: - Whether this stream can be split. For sessions that use the - LIQUID sharding strategy, this value is always false. For - BALANCED sessions, this value is false when enough data have - been read such that no more splits are possible at that point - or beyond. For small tables or streams that are the result of - a chain of splits, this value may never be true. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.StreamStatus) - }, -) -_sym_db.RegisterMessage(StreamStatus) - -Progress = _reflection.GeneratedProtocolMessageType( - "Progress", - (_message.Message,), - { - "DESCRIPTOR": _PROGRESS, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Protocol buffer. - - Attributes: - at_response_start: - The fraction of rows assigned to the stream that have been - processed by the server so far, not including the rows in the - current response message. This value, along with - ``at_response_end``, can be used to interpolate the progress - made as the rows in the message are being processed using the - following formula: ``at_response_start + (at_response_end - - at_response_start) * rows_processed_from_response / - rows_in_response``. Note that if a filter is provided, the - ``at_response_end`` value of the previous response may not - necessarily be equal to the ``at_response_start`` value of the - current response. - at_response_end: - Similar to ``at_response_start``, except that this value - includes the rows in the current response. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.Progress) - }, -) -_sym_db.RegisterMessage(Progress) - -ThrottleStatus = _reflection.GeneratedProtocolMessageType( - "ThrottleStatus", - (_message.Message,), - { - "DESCRIPTOR": _THROTTLESTATUS, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Information on if the current connection is being throttled. - - Attributes: - throttle_percent: - How much this connection is being throttled. 0 is no - throttling, 100 is completely throttled. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ThrottleStatus) - }, -) -_sym_db.RegisterMessage(ThrottleStatus) - -ReadRowsResponse = _reflection.GeneratedProtocolMessageType( - "ReadRowsResponse", - (_message.Message,), - { - "DESCRIPTOR": _READROWSRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Response from calling ``ReadRows`` may include row data, progress and - throttling information. - - Attributes: - rows: - Row data is returned in format specified during session - creation. - avro_rows: - Serialized row data in AVRO format. - arrow_record_batch: - Serialized row data in Arrow RecordBatch format. - row_count: - Number of serialized rows in the rows block. This value is - recorded here, in addition to the row_count values in the - output-specific messages in ``rows``, so that code which needs - to record progress through the stream can do so in an output - format-independent way. - status: - Estimated stream statistics. - throttle_status: - Throttling status. If unset, the latest response still - describes the current throttling status. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.ReadRowsResponse) - }, -) -_sym_db.RegisterMessage(ReadRowsResponse) - -BatchCreateReadSessionStreamsRequest = _reflection.GeneratedProtocolMessageType( - "BatchCreateReadSessionStreamsRequest", - (_message.Message,), - { - "DESCRIPTOR": _BATCHCREATEREADSESSIONSTREAMSREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Information needed to request additional streams for an established - read session. - - Attributes: - session: - Required. Must be a non-expired session obtained from a call - to CreateReadSession. Only the name field needs to be set. - requested_streams: - Required. Number of new streams requested. Must be positive. - Number of added streams may be less than this, see - CreateReadSessionRequest for more information. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsRequest) - }, -) -_sym_db.RegisterMessage(BatchCreateReadSessionStreamsRequest) - -BatchCreateReadSessionStreamsResponse = _reflection.GeneratedProtocolMessageType( - "BatchCreateReadSessionStreamsResponse", - (_message.Message,), - { - "DESCRIPTOR": _BATCHCREATEREADSESSIONSTREAMSRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """The response from ``BatchCreateReadSessionStreams`` returns the stream - identifiers for the newly created streams. - - Attributes: - streams: - Newly added streams. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.BatchCreateReadSessionStreamsResponse) - }, -) -_sym_db.RegisterMessage(BatchCreateReadSessionStreamsResponse) - -FinalizeStreamRequest = _reflection.GeneratedProtocolMessageType( - "FinalizeStreamRequest", - (_message.Message,), - { - "DESCRIPTOR": _FINALIZESTREAMREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Request information for invoking ``FinalizeStream``. - - Attributes: - stream: - Required. Stream to finalize. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.FinalizeStreamRequest) - }, -) -_sym_db.RegisterMessage(FinalizeStreamRequest) - -SplitReadStreamRequest = _reflection.GeneratedProtocolMessageType( - "SplitReadStreamRequest", - (_message.Message,), - { - "DESCRIPTOR": _SPLITREADSTREAMREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Request information for ``SplitReadStream``. - - Attributes: - original_stream: - Required. Stream to split. - fraction: - A value in the range (0.0, 1.0) that specifies the fractional - point at which the original stream should be split. The actual - split point is evaluated on pre-filtered rows, so if a filter - is provided, then there is no guarantee that the division of - the rows between the new child streams will be proportional to - this fractional value. Additionally, because the server-side - unit for assigning data is collections of rows, this fraction - will always map to to a data storage boundary on the server - side. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.SplitReadStreamRequest) - }, -) -_sym_db.RegisterMessage(SplitReadStreamRequest) - -SplitReadStreamResponse = _reflection.GeneratedProtocolMessageType( - "SplitReadStreamResponse", - (_message.Message,), - { - "DESCRIPTOR": _SPLITREADSTREAMRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.storage_pb2", - "__doc__": """Response from ``SplitReadStream``. - - Attributes: - primary_stream: - Primary stream, which contains the beginning portion of - \|original_stream|. An empty value indicates that the original - stream can no longer be split. - remainder_stream: - Remainder stream, which contains the tail of - \|original_stream|. An empty value indicates that the original - stream can no longer be split. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.SplitReadStreamResponse) - }, -) -_sym_db.RegisterMessage(SplitReadStreamResponse) - - -DESCRIPTOR._options = None -_STREAM._options = None -_READSESSION._options = None -_CREATEREADSESSIONREQUEST.fields_by_name["table_reference"]._options = None -_CREATEREADSESSIONREQUEST.fields_by_name["parent"]._options = None -_READROWSREQUEST.fields_by_name["read_position"]._options = None -_BATCHCREATEREADSESSIONSTREAMSREQUEST.fields_by_name["session"]._options = None -_BATCHCREATEREADSESSIONSTREAMSREQUEST.fields_by_name[ - "requested_streams" -]._options = None -_FINALIZESTREAMREQUEST.fields_by_name["stream"]._options = None -_SPLITREADSTREAMREQUEST.fields_by_name["original_stream"]._options = None - -_BIGQUERYSTORAGE = _descriptor.ServiceDescriptor( - name="BigQueryStorage", - full_name="google.cloud.bigquery.storage.v1beta1.BigQueryStorage", - file=DESCRIPTOR, - index=0, - serialized_options=b"\312A\036bigquerystorage.googleapis.com\322A\211\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform", - create_key=_descriptor._internal_create_key, - serialized_start=3431, - serialized_end=4818, - methods=[ - _descriptor.MethodDescriptor( - name="CreateReadSession", - full_name="google.cloud.bigquery.storage.v1beta1.BigQueryStorage.CreateReadSession", - index=0, - containing_service=None, - input_type=_CREATEREADSESSIONREQUEST, - output_type=_READSESSION, - serialized_options=b'\202\323\344\223\002w"0/v1beta1/{table_reference.project_id=projects/*}:\001*Z@";/v1beta1/{table_reference.dataset_id=projects/*/datasets/*}:\001*\332A(table_reference,parent,requested_streams', - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="ReadRows", - full_name="google.cloud.bigquery.storage.v1beta1.BigQueryStorage.ReadRows", - index=1, - containing_service=None, - input_type=_READROWSREQUEST, - output_type=_READROWSRESPONSE, - serialized_options=b"\202\323\344\223\002;\0229/v1beta1/{read_position.stream.name=projects/*/streams/*}\332A\rread_position", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="BatchCreateReadSessionStreams", - full_name="google.cloud.bigquery.storage.v1beta1.BigQueryStorage.BatchCreateReadSessionStreams", - index=2, - containing_service=None, - input_type=_BATCHCREATEREADSESSIONSTREAMSREQUEST, - output_type=_BATCHCREATEREADSESSIONSTREAMSRESPONSE, - serialized_options=b'\202\323\344\223\0022"-/v1beta1/{session.name=projects/*/sessions/*}:\001*\332A\031session,requested_streams', - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="FinalizeStream", - full_name="google.cloud.bigquery.storage.v1beta1.BigQueryStorage.FinalizeStream", - index=3, - containing_service=None, - input_type=_FINALIZESTREAMREQUEST, - output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, - serialized_options=b'\202\323\344\223\0020"+/v1beta1/{stream.name=projects/*/streams/*}:\001*\332A\006stream', - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="SplitReadStream", - full_name="google.cloud.bigquery.storage.v1beta1.BigQueryStorage.SplitReadStream", - index=4, - containing_service=None, - input_type=_SPLITREADSTREAMREQUEST, - output_type=_SPLITREADSTREAMRESPONSE, - serialized_options=b"\202\323\344\223\0026\0224/v1beta1/{original_stream.name=projects/*/streams/*}\332A\017original_stream", - create_key=_descriptor._internal_create_key, - ), - ], -) -_sym_db.RegisterServiceDescriptor(_BIGQUERYSTORAGE) - -DESCRIPTOR.services_by_name["BigQueryStorage"] = _BIGQUERYSTORAGE - -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py deleted file mode 100644 index 4ebf8d74..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/storage_pb2_grpc.py +++ /dev/null @@ -1,312 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -from google.cloud.bigquery_storage_v1beta1.proto import ( - storage_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 - - -class BigQueryStorageStub(object): - """BigQuery storage API. - - The BigQuery storage API can be used to read data stored in BigQuery. - """ - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.CreateReadSession = channel.unary_unary( - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/CreateReadSession", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.FromString, - ) - self.ReadRows = channel.unary_stream( - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/ReadRows", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.FromString, - ) - self.BatchCreateReadSessionStreams = channel.unary_unary( - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/BatchCreateReadSessionStreams", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.FromString, - ) - self.FinalizeStream = channel.unary_unary( - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/FinalizeStream", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.SerializeToString, - response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, - ) - self.SplitReadStream = channel.unary_unary( - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/SplitReadStream", - request_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.FromString, - ) - - -class BigQueryStorageServicer(object): - """BigQuery storage API. - - The BigQuery storage API can be used to read data stored in BigQuery. - """ - - def CreateReadSession(self, request, context): - """Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ReadRows(self, request, context): - """Reads rows from the table in the format prescribed by the read session. - Each response contains one or more table rows, up to a maximum of 10 MiB - per response; read requests which attempt to read individual rows larger - than this will fail. - - Each request also returns a set of stream statistics reflecting the - estimated total number of rows in the read stream. This number is computed - based on the total table size and the number of active streams in the read - session, and may change as other streams continue to read data. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def BatchCreateReadSessionStreams(self, request, context): - """Creates additional streams for a ReadSession. This API can be used to - dynamically adjust the parallelism of a batch processing task upwards by - adding additional workers. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def FinalizeStream(self, request, context): - """Triggers the graceful termination of a single stream in a ReadSession. This - API can be used to dynamically adjust the parallelism of a batch processing - task downwards without losing data. - - This API does not delete the stream -- it remains visible in the - ReadSession, and any data processed by the stream is not released to other - streams. However, no additional data will be assigned to the stream once - this call completes. Callers must continue reading data on the stream until - the end of the stream is reached so that data which has already been - assigned to the stream will be processed. - - This method will return an error if there are no other live streams - in the Session, or if SplitReadStream() has been called on the given - Stream. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def SplitReadStream(self, request, context): - """Splits a given read stream into two Streams. These streams are referred to - as the primary and the residual of the split. The original stream can still - be read from in the same manner as before. Both of the returned streams can - also be read from, and the total rows return by both child streams will be - the same as the rows read from the original stream. - - Moreover, the two child streams will be allocated back to back in the - original Stream. Concretely, it is guaranteed that for streams Original, - Primary, and Residual, that Original[0-j] = Primary[0-j] and - Original[j-n] = Residual[0-m] once the streams have been read to - completion. - - This method is guaranteed to be idempotent. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_BigQueryStorageServicer_to_server(servicer, server): - rpc_method_handlers = { - "CreateReadSession": grpc.unary_unary_rpc_method_handler( - servicer.CreateReadSession, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.SerializeToString, - ), - "ReadRows": grpc.unary_stream_rpc_method_handler( - servicer.ReadRows, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.SerializeToString, - ), - "BatchCreateReadSessionStreams": grpc.unary_unary_rpc_method_handler( - servicer.BatchCreateReadSessionStreams, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.SerializeToString, - ), - "FinalizeStream": grpc.unary_unary_rpc_method_handler( - servicer.FinalizeStream, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.FromString, - response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, - ), - "SplitReadStream": grpc.unary_unary_rpc_method_handler( - servicer.SplitReadStream, - request_deserializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - "google.cloud.bigquery.storage.v1beta1.BigQueryStorage", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) - - -# This class is part of an EXPERIMENTAL API. -class BigQueryStorage(object): - """BigQuery storage API. - - The BigQuery storage API can be used to read data stored in BigQuery. - """ - - @staticmethod - def CreateReadSession( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/CreateReadSession", - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.CreateReadSessionRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadSession.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ReadRows( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_stream( - request, - target, - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/ReadRows", - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.ReadRowsResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def BatchCreateReadSessionStreams( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/BatchCreateReadSessionStreams", - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.BatchCreateReadSessionStreamsResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def FinalizeStream( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/FinalizeStream", - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.FinalizeStreamRequest.SerializeToString, - google_dot_protobuf_dot_empty__pb2.Empty.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def SplitReadStream( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.storage.v1beta1.BigQueryStorage/SplitReadStream", - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamRequest.SerializeToString, - google_dot_cloud_dot_bigquery__storage__v1beta1_dot_proto_dot_storage__pb2.SplitReadStreamResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) diff --git a/google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto b/google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto deleted file mode 100644 index 4269392f..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta1; - -import "google/api/resource.proto"; -import "google/protobuf/timestamp.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage"; -option java_outer_classname = "TableReferenceProto"; -option java_package = "com.google.cloud.bigquery.storage.v1beta1"; - -// Table reference that includes just the 3 strings needed to identify a table. -message TableReference { - // The assigned project ID of the project. - string project_id = 1; - - // The ID of the dataset in the above project. - string dataset_id = 2; - - // The ID of the table in the above dataset. - string table_id = 3; -} - -// All fields in this message optional. -message TableModifiers { - // The snapshot time of the table. If not set, interpreted as now. - google.protobuf.Timestamp snapshot_time = 1; -} diff --git a/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py b/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py deleted file mode 100644 index 0d15b17c..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2.py +++ /dev/null @@ -1,201 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import resource_pb2 as google_dot_api_dot_resource__pb2 -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta1/proto/table_reference.proto", - package="google.cloud.bigquery.storage.v1beta1", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta1B\023TableReferenceProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\nAgoogle/cloud/bigquery_storage_v1beta1/proto/table_reference.proto\x12%google.cloud.bigquery.storage.v1beta1\x1a\x19google/api/resource.proto\x1a\x1fgoogle/protobuf/timestamp.proto"J\n\x0eTableReference\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x12\x10\n\x08table_id\x18\x03 \x01(\t"C\n\x0eTableModifiers\x12\x31\n\rsnapshot_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.TimestampB\x8e\x01\n)com.google.cloud.bigquery.storage.v1beta1B\x13TableReferenceProtoZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storageb\x06proto3', - dependencies=[ - google_dot_api_dot_resource__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - ], -) - - -_TABLEREFERENCE = _descriptor.Descriptor( - name="TableReference", - full_name="google.cloud.bigquery.storage.v1beta1.TableReference", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.storage.v1beta1.TableReference.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.storage.v1beta1.TableReference.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table_id", - full_name="google.cloud.bigquery.storage.v1beta1.TableReference.table_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=168, - serialized_end=242, -) - - -_TABLEMODIFIERS = _descriptor.Descriptor( - name="TableModifiers", - full_name="google.cloud.bigquery.storage.v1beta1.TableModifiers", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="snapshot_time", - full_name="google.cloud.bigquery.storage.v1beta1.TableModifiers.snapshot_time", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=244, - serialized_end=311, -) - -_TABLEMODIFIERS.fields_by_name[ - "snapshot_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -DESCRIPTOR.message_types_by_name["TableReference"] = _TABLEREFERENCE -DESCRIPTOR.message_types_by_name["TableModifiers"] = _TABLEMODIFIERS -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -TableReference = _reflection.GeneratedProtocolMessageType( - "TableReference", - (_message.Message,), - { - "DESCRIPTOR": _TABLEREFERENCE, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.table_reference_pb2", - "__doc__": """Table reference that includes just the 3 strings needed to identify a - table. - - Attributes: - project_id: - The assigned project ID of the project. - dataset_id: - The ID of the dataset in the above project. - table_id: - The ID of the table in the above dataset. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableReference) - }, -) -_sym_db.RegisterMessage(TableReference) - -TableModifiers = _reflection.GeneratedProtocolMessageType( - "TableModifiers", - (_message.Message,), - { - "DESCRIPTOR": _TABLEMODIFIERS, - "__module__": "google.cloud.bigquery_storage_v1beta1.proto.table_reference_pb2", - "__doc__": """All fields in this message optional. - - Attributes: - snapshot_time: - The snapshot time of the table. If not set, interpreted as - now. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta1.TableModifiers) - }, -) -_sym_db.RegisterMessage(TableModifiers) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta1/proto/table_reference_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta1/reader.py b/google/cloud/bigquery_storage_v1beta1/reader.py deleted file mode 100644 index 60942f29..00000000 --- a/google/cloud/bigquery_storage_v1beta1/reader.py +++ /dev/null @@ -1,644 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -import collections -import json - -try: - import fastavro -except ImportError: # pragma: NO COVER - fastavro = None -import google.api_core.exceptions - -try: - import pandas -except ImportError: # pragma: NO COVER - pandas = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None -import six - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - -from google.cloud.bigquery_storage_v1beta1 import types - - -_STREAM_RESUMPTION_EXCEPTIONS = (google.api_core.exceptions.ServiceUnavailable,) - -# The Google API endpoint can unexpectedly close long-running HTTP/2 streams. -# Unfortunately, this condition is surfaced to the caller as an internal error -# by gRPC. We don't want to resume on all internal errors, so instead we look -# for error message that we know are caused by problems that are safe to -# reconnect. -_STREAM_RESUMPTION_INTERNAL_ERROR_MESSAGES = ( - # See: https://github.com/googleapis/google-cloud-python/pull/9994 - "RST_STREAM", -) - -_FASTAVRO_REQUIRED = ( - "fastavro is required to parse ReadRowResponse messages with Avro bytes." -) -_PANDAS_REQUIRED = "pandas is required to create a DataFrame" -_PYARROW_REQUIRED = ( - "pyarrow is required to parse ReadRowResponse messages with Arrow bytes." -) - - -class ReadRowsStream(object): - """A stream of results from a read rows request. - - This stream is an iterable of - :class:`~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse`. - Iterate over it to fetch all row messages. - - If the fastavro library is installed, use the - :func:`~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream.rows()` - method to parse all messages into a stream of row dictionaries. - - If the pandas and fastavro libraries are installed, use the - :func:`~google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream.to_dataframe()` - method to parse all messages into a :class:`pandas.DataFrame`. - """ - - def __init__(self, wrapped, client, read_position, read_rows_kwargs): - """Construct a ReadRowsStream. - - Args: - wrapped (Iterable[ \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ - ]): - The ReadRows stream to read. - client ( \ - ~google.cloud.bigquery_storage_v1beta1.gapic. \ - big_query_storage_client.BigQueryStorageClient \ - ): - A GAPIC client used to reconnect to a ReadRows stream. This - must be the GAPIC client to avoid a circular dependency on - this class. - read_position (Union[ \ - dict, \ - ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ - ]): - Required. Identifier of the position in the stream to start - reading from. The offset requested must be less than the last - row read from ReadRows. Requesting a larger offset is - undefined. If a dict is provided, it must be of the same form - as the protobuf message - :class:`~google.cloud.bigquery_storage_v1beta1.types.StreamPosition` - read_rows_kwargs (dict): - Keyword arguments to use when reconnecting to a ReadRows - stream. - - Returns: - Iterable[ \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ - ]: - A sequence of row messages. - """ - - # Make a copy of the read position so that we can update it without - # mutating the original input. - self._position = _copy_stream_position(read_position) - self._client = client - self._wrapped = wrapped - self._read_rows_kwargs = read_rows_kwargs - - def __iter__(self): - """An iterable of messages. - - Returns: - Iterable[ \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ - ]: - A sequence of row messages. - """ - - # Infinite loop to reconnect on reconnectable errors while processing - # the row stream. - while True: - try: - for message in self._wrapped: - rowcount = message.row_count - self._position.offset += rowcount - yield message - - return # Made it through the whole stream. - except google.api_core.exceptions.InternalServerError as exc: - resumable_error = any( - resumable_message in exc.message - for resumable_message in _STREAM_RESUMPTION_INTERNAL_ERROR_MESSAGES - ) - if not resumable_error: - raise - except _STREAM_RESUMPTION_EXCEPTIONS: - # Transient error, so reconnect to the stream. - pass - - self._reconnect() - - def _reconnect(self): - """Reconnect to the ReadRows stream using the most recent offset.""" - self._wrapped = self._client.read_rows( - _copy_stream_position(self._position), **self._read_rows_kwargs - ) - - def rows(self, read_session): - """Iterate over all rows in the stream. - - This method requires the fastavro library in order to parse row - messages. - - .. warning:: - DATETIME columns are not supported. They are currently parsed as - strings in the fastavro library. - - Args: - read_session ( \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ - ): - The read session associated with this read rows stream. This - contains the schema, which is required to parse the data - messages. - - Returns: - Iterable[Mapping]: - A sequence of rows, represented as dictionaries. - """ - return ReadRowsIterable(self, read_session) - - def to_arrow(self, read_session): - """Create a :class:`pyarrow.Table` of all rows in the stream. - - This method requires the pyarrow library and a stream using the Arrow - format. - - Args: - read_session ( \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ - ): - The read session associated with this read rows stream. This - contains the schema, which is required to parse the data - messages. - - Returns: - pyarrow.Table: - A table of all rows in the stream. - """ - return self.rows(read_session).to_arrow() - - def to_dataframe(self, read_session, dtypes=None): - """Create a :class:`pandas.DataFrame` of all rows in the stream. - - This method requires the pandas libary to create a data frame and the - fastavro library to parse row messages. - - .. warning:: - DATETIME columns are not supported. They are currently parsed as - strings. - - Args: - read_session ( \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadSession \ - ): - The read session associated with this read rows stream. This - contains the schema, which is required to parse the data - messages. - dtypes ( \ - Map[str, Union[str, pandas.Series.dtype]] \ - ): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. - - Returns: - pandas.DataFrame: - A data frame of all rows in the stream. - """ - if pandas is None: - raise ImportError(_PANDAS_REQUIRED) - - return self.rows(read_session).to_dataframe(dtypes=dtypes) - - -class ReadRowsIterable(object): - """An iterable of rows from a read session. - - Args: - reader (google.cloud.bigquery_storage_v1beta1.reader.ReadRowsStream): - A read rows stream. - read_session (google.cloud.bigquery_storage_v1beta1.types.ReadSession): - A read session. This is required because it contains the schema - used in the stream messages. - """ - - # This class is modelled after the google.cloud.bigquery.table.RowIterator - # and aims to be API compatible where possible. - - def __init__(self, reader, read_session): - self._status = None - self._reader = reader - self._read_session = read_session - self._stream_parser = _StreamParser.from_read_session(self._read_session) - - @property - def total_rows(self): - """int: Number of estimated rows in the current stream. - - May change over time. - """ - return getattr(self._status, "estimated_row_count", None) - - @property - def pages(self): - """A generator of all pages in the stream. - - Returns: - types.GeneratorType[google.cloud.bigquery_storage_v1beta1.ReadRowsPage]: - A generator of pages. - """ - # Each page is an iterator of rows. But also has num_items, remaining, - # and to_dataframe. - for message in self._reader: - self._status = message.status - yield ReadRowsPage(self._stream_parser, message) - - def __iter__(self): - """Iterator for each row in all pages.""" - for page in self.pages: - for row in page: - yield row - - def to_arrow(self): - """Create a :class:`pyarrow.Table` of all rows in the stream. - - This method requires the pyarrow library and a stream using the Arrow - format. - - Returns: - pyarrow.Table: - A table of all rows in the stream. - """ - record_batches = [] - for page in self.pages: - record_batches.append(page.to_arrow()) - return pyarrow.Table.from_batches(record_batches) - - def to_dataframe(self, dtypes=None): - """Create a :class:`pandas.DataFrame` of all rows in the stream. - - This method requires the pandas libary to create a data frame and the - fastavro library to parse row messages. - - .. warning:: - DATETIME columns are not supported. They are currently parsed as - strings in the fastavro library. - - Args: - dtypes ( \ - Map[str, Union[str, pandas.Series.dtype]] \ - ): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. - - Returns: - pandas.DataFrame: - A data frame of all rows in the stream. - """ - if pandas is None: - raise ImportError(_PANDAS_REQUIRED) - - if dtypes is None: - dtypes = {} - - # If it's an Arrow stream, calling to_arrow, then converting to a - # pandas dataframe is about 2x faster. This is because pandas.concat is - # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is - # usually no-copy. - schema_type = self._read_session.WhichOneof("schema") - if schema_type == "arrow_schema": - record_batch = self.to_arrow() - df = record_batch.to_pandas() - for column in dtypes: - df[column] = pandas.Series(df[column], dtype=dtypes[column]) - return df - - frames = [] - for page in self.pages: - frames.append(page.to_dataframe(dtypes=dtypes)) - return pandas.concat(frames) - - -class ReadRowsPage(object): - """An iterator of rows from a read session message. - - Args: - stream_parser (google.cloud.bigquery_storage_v1beta1.reader._StreamParser): - A helper for parsing messages into rows. - message (google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse): - A message of data from a read rows stream. - """ - - # This class is modeled after google.api_core.page_iterator.Page and aims - # to provide API compatibility where possible. - - def __init__(self, stream_parser, message): - self._stream_parser = stream_parser - self._message = message - self._iter_rows = None - self._num_items = self._message.row_count - self._remaining = self._message.row_count - - def _parse_rows(self): - """Parse rows from the message only once.""" - if self._iter_rows is not None: - return - - rows = self._stream_parser.to_rows(self._message) - self._iter_rows = iter(rows) - - @property - def num_items(self): - """int: Total items in the page.""" - return self._num_items - - @property - def remaining(self): - """int: Remaining items in the page.""" - return self._remaining - - def __iter__(self): - """A ``ReadRowsPage`` is an iterator.""" - return self - - def next(self): - """Get the next row in the page.""" - self._parse_rows() - if self._remaining > 0: - self._remaining -= 1 - return six.next(self._iter_rows) - - # Alias needed for Python 2/3 support. - __next__ = next - - def to_arrow(self): - """Create an :class:`pyarrow.RecordBatch` of rows in the page. - - Returns: - pyarrow.RecordBatch: - Rows from the message, as an Arrow record batch. - """ - return self._stream_parser.to_arrow(self._message) - - def to_dataframe(self, dtypes=None): - """Create a :class:`pandas.DataFrame` of rows in the page. - - This method requires the pandas libary to create a data frame and the - fastavro library to parse row messages. - - .. warning:: - DATETIME columns are not supported. They are currently parsed as - strings in the fastavro library. - - Args: - dtypes ( \ - Map[str, Union[str, pandas.Series.dtype]] \ - ): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. - - Returns: - pandas.DataFrame: - A data frame of all rows in the stream. - """ - if pandas is None: - raise ImportError(_PANDAS_REQUIRED) - - return self._stream_parser.to_dataframe(self._message, dtypes=dtypes) - - -class _StreamParser(object): - def to_arrow(self, message): - raise NotImplementedError("Not implemented.") - - def to_dataframe(self, message, dtypes=None): - raise NotImplementedError("Not implemented.") - - def to_rows(self, message): - raise NotImplementedError("Not implemented.") - - @staticmethod - def from_read_session(read_session): - schema_type = read_session.WhichOneof("schema") - if schema_type == "avro_schema": - return _AvroStreamParser(read_session) - elif schema_type == "arrow_schema": - return _ArrowStreamParser(read_session) - else: - raise TypeError( - "Unsupported schema type in read_session: {0}".format(schema_type) - ) - - -class _AvroStreamParser(_StreamParser): - """Helper to parse Avro messages into useful representations.""" - - def __init__(self, read_session): - """Construct an _AvroStreamParser. - - Args: - read_session (google.cloud.bigquery_storage_v1beta1.types.ReadSession): - A read session. This is required because it contains the schema - used in the stream messages. - """ - if fastavro is None: - raise ImportError(_FASTAVRO_REQUIRED) - - self._read_session = read_session - self._avro_schema_json = None - self._fastavro_schema = None - self._column_names = None - - def to_arrow(self, message): - """Create an :class:`pyarrow.RecordBatch` of rows in the page. - - Args: - message (google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse): - Protocol buffer from the read rows stream, to convert into an - Arrow record batch. - - Returns: - pyarrow.RecordBatch: - Rows from the message, as an Arrow record batch. - """ - raise NotImplementedError("to_arrow not implemented for Avro streams.") - - def to_dataframe(self, message, dtypes=None): - """Create a :class:`pandas.DataFrame` of rows in the page. - - This method requires the pandas libary to create a data frame and the - fastavro library to parse row messages. - - .. warning:: - DATETIME columns are not supported. They are currently parsed as - strings in the fastavro library. - - Args: - dtypes ( \ - Map[str, Union[str, pandas.Series.dtype]] \ - ): - Optional. A dictionary of column names pandas ``dtype``s. The - provided ``dtype`` is used when constructing the series for - the column specified. Otherwise, the default pandas behavior - is used. - - Returns: - pandas.DataFrame: - A data frame of all rows in the stream. - """ - self._parse_avro_schema() - - if dtypes is None: - dtypes = {} - - columns = collections.defaultdict(list) - for row in self.to_rows(message): - for column in row: - columns[column].append(row[column]) - for column in dtypes: - columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) - return pandas.DataFrame(columns, columns=self._column_names) - - def _parse_avro_schema(self): - """Extract and parse Avro schema from a read session.""" - if self._avro_schema_json: - return - - self._avro_schema_json = json.loads(self._read_session.avro_schema.schema) - self._column_names = tuple( - (field["name"] for field in self._avro_schema_json["fields"]) - ) - - def _parse_fastavro(self): - """Convert parsed Avro schema to fastavro format.""" - self._parse_avro_schema() - self._fastavro_schema = fastavro.parse_schema(self._avro_schema_json) - - def to_rows(self, message): - """Parse all rows in a stream message. - - Args: - message ( \ - ~google.cloud.bigquery_storage_v1beta1.types.ReadRowsResponse \ - ): - A message containing Avro bytes to parse into rows. - - Returns: - Iterable[Mapping]: - A sequence of rows, represented as dictionaries. - """ - self._parse_fastavro() - messageio = six.BytesIO(message.avro_rows.serialized_binary_rows) - while True: - # Loop in a while loop because schemaless_reader can only read - # a single record. - try: - # TODO: Parse DATETIME into datetime.datetime (no timezone), - # instead of as a string. - yield fastavro.schemaless_reader(messageio, self._fastavro_schema) - except StopIteration: - break # Finished with message - - -class _ArrowStreamParser(_StreamParser): - def __init__(self, read_session): - if pyarrow is None: - raise ImportError(_PYARROW_REQUIRED) - - self._read_session = read_session - self._schema = None - - def to_arrow(self, message): - return self._parse_arrow_message(message) - - def to_rows(self, message): - record_batch = self._parse_arrow_message(message) - - # Iterate through each column simultaneously, and make a dict from the - # row values - for row in zip(*record_batch.columns): - yield dict(zip(self._column_names, row)) - - def to_dataframe(self, message, dtypes=None): - record_batch = self._parse_arrow_message(message) - - if dtypes is None: - dtypes = {} - - df = record_batch.to_pandas() - - for column in dtypes: - df[column] = pandas.Series(df[column], dtype=dtypes[column]) - - return df - - def _parse_arrow_message(self, message): - self._parse_arrow_schema() - - return pyarrow.ipc.read_record_batch( - pyarrow.py_buffer(message.arrow_record_batch.serialized_record_batch), - self._schema, - ) - - def _parse_arrow_schema(self): - if self._schema: - return - - self._schema = pyarrow.ipc.read_schema( - pyarrow.py_buffer(self._read_session.arrow_schema.serialized_schema) - ) - self._column_names = [field.name for field in self._schema] - - -def _copy_stream_position(position): - """Copy a StreamPosition. - - Args: - position (Union[ \ - dict, \ - ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition \ - ]): - StreamPostion (or dictionary in StreamPosition format) to copy. - - Returns: - ~google.cloud.bigquery_storage_v1beta1.types.StreamPosition: - A copy of the input StreamPostion. - """ - if isinstance(position, types.StreamPosition): - output = types.StreamPosition() - output.CopyFrom(position) - return output - - return types.StreamPosition(**position) diff --git a/google/cloud/bigquery_storage_v1beta1/types.py b/google/cloud/bigquery_storage_v1beta1/types.py deleted file mode 100644 index 2d343762..00000000 --- a/google/cloud/bigquery_storage_v1beta1/types.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -import sys - -from google.api_core.protobuf_helpers import get_messages - -from google.cloud.bigquery_storage_v1beta1.proto import arrow_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import avro_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import read_options_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 -from google.protobuf import empty_pb2 -from google.protobuf import timestamp_pb2 - - -_shared_modules = [ - empty_pb2, - timestamp_pb2, -] - -_local_modules = [ - arrow_pb2, - avro_pb2, - read_options_pb2, - storage_pb2, - table_reference_pb2, -] - -names = [] - -for module in _shared_modules: # pragma: NO COVER - for name, message in get_messages(module).items(): - setattr(sys.modules[__name__], name, message) - names.append(name) -for module in _local_modules: - for name, message in get_messages(module).items(): - message.__module__ = "google.cloud.bigquery_storage_v1beta1.types" - setattr(sys.modules[__name__], name, message) - names.append(name) - - -__all__ = tuple(sorted(names)) diff --git a/google/cloud/bigquery_storage_v1beta2/__init__.py b/google/cloud/bigquery_storage_v1beta2/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta2/gapic/__init__.py b/google/cloud/bigquery_storage_v1beta2/gapic/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta2/gapic/big_query_read_client.py b/google/cloud/bigquery_storage_v1beta2/gapic/big_query_read_client.py deleted file mode 100644 index e088719d..00000000 --- a/google/cloud/bigquery_storage_v1beta2/gapic/big_query_read_client.py +++ /dev/null @@ -1,501 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Accesses the google.cloud.bigquery.storage.v1beta2 BigQueryRead API.""" - -import pkg_resources -import warnings - -from google.oauth2 import service_account -import google.api_core.client_options -import google.api_core.gapic_v1.client_info -import google.api_core.gapic_v1.config -import google.api_core.gapic_v1.method -import google.api_core.path_template -import google.api_core.gapic_v1.routing_header -import google.api_core.grpc_helpers -import google.api_core.path_template -import grpc - -from google.cloud.bigquery_storage_v1beta2.gapic import big_query_read_client_config -from google.cloud.bigquery_storage_v1beta2.gapic import enums -from google.cloud.bigquery_storage_v1beta2.gapic.transports import ( - big_query_read_grpc_transport, -) -from google.cloud.bigquery_storage_v1beta2.proto import storage_pb2 -from google.cloud.bigquery_storage_v1beta2.proto import storage_pb2_grpc -from google.cloud.bigquery_storage_v1beta2.proto import stream_pb2 - - -_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( - "google-cloud-bigquery-storage", -).version - - -class BigQueryReadClient(object): - """ - BigQuery Read API. - - The Read API can be used to read data from BigQuery. - """ - - SERVICE_ADDRESS = "bigquerystorage.googleapis.com:443" - """The default address of the service.""" - - # The name of the interface for this client. This is the key used to - # find the method configuration in the client_config dictionary. - _INTERFACE_NAME = "google.cloud.bigquery.storage.v1beta2.BigQueryRead" - - @classmethod - def from_service_account_file(cls, filename, *args, **kwargs): - """Creates an instance of this client using the provided credentials - file. - - Args: - filename (str): The path to the service account private key json - file. - args: Additional arguments to pass to the constructor. - kwargs: Additional arguments to pass to the constructor. - - Returns: - BigQueryReadClient: The constructed client. - """ - credentials = service_account.Credentials.from_service_account_file(filename) - kwargs["credentials"] = credentials - return cls(*args, **kwargs) - - from_service_account_json = from_service_account_file - - @classmethod - def project_path(cls, project): - """Return a fully-qualified project string.""" - return google.api_core.path_template.expand( - "projects/{project}", project=project, - ) - - @classmethod - def read_session_path(cls, project, location, session): - """Return a fully-qualified read_session string.""" - return google.api_core.path_template.expand( - "projects/{project}/locations/{location}/sessions/{session}", - project=project, - location=location, - session=session, - ) - - @classmethod - def read_stream_path(cls, project, location, session, stream): - """Return a fully-qualified read_stream string.""" - return google.api_core.path_template.expand( - "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}", - project=project, - location=location, - session=session, - stream=stream, - ) - - @classmethod - def table_path(cls, project, dataset, table): - """Return a fully-qualified table string.""" - return google.api_core.path_template.expand( - "projects/{project}/datasets/{dataset}/tables/{table}", - project=project, - dataset=dataset, - table=table, - ) - - def __init__( - self, - transport=None, - channel=None, - credentials=None, - client_config=None, - client_info=None, - client_options=None, - ): - """Constructor. - - Args: - transport (Union[~.BigQueryReadGrpcTransport, - Callable[[~.Credentials, type], ~.BigQueryReadGrpcTransport]): A transport - instance, responsible for actually making the API calls. - The default transport uses the gRPC protocol. - This argument may also be a callable which returns a - transport instance. Callables will be sent the credentials - as the first argument and the default transport class as - the second argument. - channel (grpc.Channel): DEPRECATED. A ``Channel`` instance - through which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - This argument is mutually exclusive with providing a - transport instance to ``transport``; doing so will raise - an exception. - client_config (dict): DEPRECATED. A dictionary of call options for - each method. If not specified, the default configuration is used. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing - your own client library. - client_options (Union[dict, google.api_core.client_options.ClientOptions]): - Client options used to set user options on the client. API Endpoint - should be set through client_options. - """ - # Raise deprecation warnings for things we want to go away. - if client_config is not None: - warnings.warn( - "The `client_config` argument is deprecated.", - PendingDeprecationWarning, - stacklevel=2, - ) - else: - client_config = big_query_read_client_config.config - - if channel: - warnings.warn( - "The `channel` argument is deprecated; use " "`transport` instead.", - PendingDeprecationWarning, - stacklevel=2, - ) - - api_endpoint = self.SERVICE_ADDRESS - if client_options: - if type(client_options) == dict: - client_options = google.api_core.client_options.from_dict( - client_options - ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - - # Instantiate the transport. - # The transport is responsible for handling serialization and - # deserialization and actually sending data to the service. - if transport: # pragma: no cover - if callable(transport): - self.transport = transport( - credentials=credentials, - default_class=big_query_read_grpc_transport.BigQueryReadGrpcTransport, - address=api_endpoint, - ) - else: - if credentials: - raise ValueError( - "Received both a transport instance and " - "credentials; these are mutually exclusive." - ) - self.transport = transport - else: - self.transport = big_query_read_grpc_transport.BigQueryReadGrpcTransport( - address=api_endpoint, channel=channel, credentials=credentials, - ) - - if client_info is None: - client_info = google.api_core.gapic_v1.client_info.ClientInfo( - gapic_version=_GAPIC_LIBRARY_VERSION, - ) - else: - client_info.gapic_version = _GAPIC_LIBRARY_VERSION - self._client_info = client_info - - # Parse out the default settings for retry and timeout for each RPC - # from the client configuration. - # (Ordinarily, these are the defaults specified in the `*_config.py` - # file next to this one.) - self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( - client_config["interfaces"][self._INTERFACE_NAME], - ) - - # Save a dictionary of cached API call functions. - # These are the actual callables which invoke the proper - # transport methods, wrapped with `wrap_method` to add retry, - # timeout, and the like. - self._inner_api_calls = {} - - # Service calls - def create_read_session( - self, - parent, - read_session, - max_stream_count=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Data is assigned to each stream such that roughly the same number of - rows can be read from each stream. Because the server-side unit for - assigning data is collections of rows, the API does not guarantee that - each stream will return the same number or rows. Additionally, the - limits are enforced based on the number of pre-filtered rows, so some - filters can lead to lopsided assignments. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - - Example: - >>> from google.cloud import bigquery_storage_v1beta2 - >>> - >>> client = bigquery_storage_v1beta2.BigQueryReadClient() - >>> - >>> parent = client.project_path('[PROJECT]') - >>> - >>> # TODO: Initialize `read_session`: - >>> read_session = {} - >>> - >>> response = client.create_read_session(parent, read_session) - - Args: - parent (str): Request message for ``ReadRows``. - read_session (Union[dict, ~google.cloud.bigquery_storage_v1beta2.types.ReadSession]): Required. Session to be created. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.bigquery_storage_v1beta2.types.ReadSession` - max_stream_count (int): Max initial number of streams. If unset or zero, the server will - provide a value of streams so as to produce reasonable throughput. Must be - non-negative. The number of streams may be lower than the requested number, - depending on the amount parallelism that is reasonable for the table. Error - will be returned if the max count is greater than the current system - max limit of 1,000. - - Streams must be read starting from offset 0. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1beta2.types.ReadSession` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "create_read_session" not in self._inner_api_calls: - self._inner_api_calls[ - "create_read_session" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.create_read_session, - default_retry=self._method_configs["CreateReadSession"].retry, - default_timeout=self._method_configs["CreateReadSession"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.CreateReadSessionRequest( - parent=parent, read_session=read_session, max_stream_count=max_stream_count, - ) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("read_session.table", read_session.table)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["create_read_session"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def read_rows( - self, - read_stream, - offset=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - Reads rows from the stream in the format prescribed by the ReadSession. - Each response contains one or more table rows, up to a maximum of 100 MiB - per response; read requests which attempt to read individual rows larger - than 100 MiB will fail. - - Each request also returns a set of stream statistics reflecting the current - state of the stream. - - Example: - >>> from google.cloud import bigquery_storage_v1beta2 - >>> - >>> client = bigquery_storage_v1beta2.BigQueryReadClient() - >>> - >>> read_stream = client.read_stream_path('[PROJECT]', '[LOCATION]', '[SESSION]', '[STREAM]') - >>> - >>> for element in client.read_rows(read_stream): - ... # process element - ... pass - - Args: - read_stream (str): Required. Stream to read rows from. - offset (long): The offset requested must be less than the last row read from Read. - Requesting a larger offset is undefined. If not specified, start reading - from offset zero. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - Iterable[~google.cloud.bigquery_storage_v1beta2.types.ReadRowsResponse]. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "read_rows" not in self._inner_api_calls: - self._inner_api_calls[ - "read_rows" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.read_rows, - default_retry=self._method_configs["ReadRows"].retry, - default_timeout=self._method_configs["ReadRows"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.ReadRowsRequest(read_stream=read_stream, offset=offset,) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("read_stream", read_stream)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["read_rows"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - - def split_read_stream( - self, - name, - fraction=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - An indicator of the behavior of a given field (for example, that a - field is required in requests, or given as output but ignored as input). - This **does not** change the behavior in protocol buffers itself; it - only denotes the behavior and may affect how API tooling handles the - field. - - Note: This enum **may** receive new values in the future. - - Example: - >>> from google.cloud import bigquery_storage_v1beta2 - >>> - >>> client = bigquery_storage_v1beta2.BigQueryReadClient() - >>> - >>> name = client.read_stream_path('[PROJECT]', '[LOCATION]', '[SESSION]', '[STREAM]') - >>> - >>> response = client.split_read_stream(name) - - Args: - name (str): Required. Name of the stream to split. - fraction (float): A value in the range (0.0, 1.0) that specifies the fractional point at - which the original stream should be split. The actual split point is - evaluated on pre-filtered rows, so if a filter is provided, then there is - no guarantee that the division of the rows between the new child streams - will be proportional to this fractional value. Additionally, because the - server-side unit for assigning data is collections of rows, this fraction - will always map to a data storage boundary on the server side. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.bigquery_storage_v1beta2.types.SplitReadStreamResponse` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "split_read_stream" not in self._inner_api_calls: - self._inner_api_calls[ - "split_read_stream" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.split_read_stream, - default_retry=self._method_configs["SplitReadStream"].retry, - default_timeout=self._method_configs["SplitReadStream"].timeout, - client_info=self._client_info, - ) - - request = storage_pb2.SplitReadStreamRequest(name=name, fraction=fraction,) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("name", name)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) # pragma: no cover - - return self._inner_api_calls["split_read_stream"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) diff --git a/google/cloud/bigquery_storage_v1beta2/gapic/big_query_read_client_config.py b/google/cloud/bigquery_storage_v1beta2/gapic/big_query_read_client_config.py deleted file mode 100644 index 4123f5e5..00000000 --- a/google/cloud/bigquery_storage_v1beta2/gapic/big_query_read_client_config.py +++ /dev/null @@ -1,67 +0,0 @@ -config = { - "interfaces": { - "google.cloud.bigquery.storage.v1beta2.BigQueryRead": { - "retry_codes": { - "retry_policy_1_codes": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "no_retry_codes": [], - "retry_policy_3_codes": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "retry_policy_2_codes": ["UNAVAILABLE"], - }, - "retry_params": { - "retry_policy_1_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 600000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 600000, - "total_timeout_millis": 600000, - }, - "retry_policy_3_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 600000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 600000, - "total_timeout_millis": 600000, - }, - "retry_policy_2_params": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 86400000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 86400000, - "total_timeout_millis": 86400000, - }, - "no_retry_params": { - "initial_retry_delay_millis": 0, - "retry_delay_multiplier": 0.0, - "max_retry_delay_millis": 0, - "initial_rpc_timeout_millis": 0, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 0, - "total_timeout_millis": 0, - }, - }, - "methods": { - "CreateReadSession": { - "timeout_millis": 120000, - "retry_codes_name": "retry_policy_1_codes", - "retry_params_name": "retry_policy_1_params", - }, - "ReadRows": { - "timeout_millis": 21600000, - "retry_codes_name": "retry_policy_2_codes", - "retry_params_name": "retry_policy_2_params", - }, - "SplitReadStream": { - "timeout_millis": 120000, - "retry_codes_name": "retry_policy_3_codes", - "retry_params_name": "retry_policy_3_params", - }, - }, - } - } -} diff --git a/google/cloud/bigquery_storage_v1beta2/gapic/enums.py b/google/cloud/bigquery_storage_v1beta2/gapic/enums.py deleted file mode 100644 index 1e51229d..00000000 --- a/google/cloud/bigquery_storage_v1beta2/gapic/enums.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers for protocol buffer enum types.""" - -import enum - - -class DataFormat(enum.IntEnum): - """ - Data format for input or output data. - - Attributes: - DATA_FORMAT_UNSPECIFIED (int) - AVRO (int): Avro is a standard open source row based file format. - See https://avro.apache.org/ for more details. - ARROW (int): Arrow is a standard open source column-based message format. - See https://arrow.apache.org/ for more details. - """ - - DATA_FORMAT_UNSPECIFIED = 0 - AVRO = 1 - ARROW = 2 - - -class ArrowSerializationOptions(object): - class Format(enum.IntEnum): - """ - The IPC format to use when serializing Arrow streams. - - Attributes: - FORMAT_UNSPECIFIED (int): If unspecied the IPC format as of 0.15 release will be used. - ARROW_0_14 (int): Use the legacy IPC message format as of Apache Arrow Release 0.14. - ARROW_0_15 (int): Use the message format as of Apache Arrow Release 0.15. - """ - - FORMAT_UNSPECIFIED = 0 - ARROW_0_14 = 1 - ARROW_0_15 = 2 diff --git a/google/cloud/bigquery_storage_v1beta2/gapic/transports/__init__.py b/google/cloud/bigquery_storage_v1beta2/gapic/transports/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta2/gapic/transports/big_query_read_grpc_transport.py b/google/cloud/bigquery_storage_v1beta2/gapic/transports/big_query_read_grpc_transport.py deleted file mode 100644 index 53834967..00000000 --- a/google/cloud/bigquery_storage_v1beta2/gapic/transports/big_query_read_grpc_transport.py +++ /dev/null @@ -1,184 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import google.api_core.grpc_helpers - -from google.cloud.bigquery_storage_v1beta2.proto import storage_pb2_grpc - - -class BigQueryReadGrpcTransport(object): - """gRPC transport class providing stubs for - google.cloud.bigquery.storage.v1beta2 BigQueryRead API. - - The transport provides access to the raw gRPC stubs, - which can be used to take advantage of advanced - features of gRPC. - """ - - # The scopes needed to make gRPC calls to all of the methods defined - # in this service. - _OAUTH_SCOPES = ( - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/bigquery.readonly", - "https://www.googleapis.com/auth/cloud-platform", - ) - - def __init__( - self, - channel=None, - credentials=None, - address="bigquerystorage.googleapis.com:443", - ): - """Instantiate the transport class. - - Args: - channel (grpc.Channel): A ``Channel`` instance through - which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - address (str): The address where the service is hosted. - """ - # If both `channel` and `credentials` are specified, raise an - # exception (channels come with credentials baked in already). - if channel is not None and credentials is not None: # pragma: no cover - raise ValueError( - "The `channel` and `credentials` arguments are mutually " "exclusive.", - ) - - # Create the channel. - if channel is None: # pragma: no cover - channel = self.create_channel( - address=address, - credentials=credentials, - options={ - "grpc.max_send_message_length": -1, - "grpc.max_receive_message_length": -1, - }.items(), - ) - - self._channel = channel - - # gRPC uses objects called "stubs" that are bound to the - # channel and provide a basic method for each RPC. - self._stubs = { - "big_query_read_stub": storage_pb2_grpc.BigQueryReadStub(channel), - } - - @classmethod - def create_channel( - cls, address="bigquerystorage.googleapis.com:443", credentials=None, **kwargs - ): - """Create and return a gRPC channel object. - - Args: - address (str): The host for the channel to use. - credentials (~.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If - none are specified, the client will attempt to ascertain - the credentials from the environment. - kwargs (dict): Keyword arguments, which are passed to the - channel creation. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return google.api_core.grpc_helpers.create_channel( # pragma: no cover - address, credentials=credentials, scopes=cls._OAUTH_SCOPES, **kwargs - ) - - @property - def channel(self): - """The gRPC channel used by the transport. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return self._channel - - @property - def create_read_session(self): - """Return the gRPC stub for :meth:`BigQueryReadClient.create_read_session`. - - Creates a new read session. A read session divides the contents of a - BigQuery table into one or more streams, which can then be used to read - data from the table. The read session also specifies properties of the - data to be read, such as a list of columns or a push-down filter describing - the rows to be returned. - - A particular row can be read by at most one stream. When the caller has - reached the end of each stream in the session, then all the data in the - table has been read. - - Data is assigned to each stream such that roughly the same number of - rows can be read from each stream. Because the server-side unit for - assigning data is collections of rows, the API does not guarantee that - each stream will return the same number or rows. Additionally, the - limits are enforced based on the number of pre-filtered rows, so some - filters can lead to lopsided assignments. - - Read sessions automatically expire 24 hours after they are created and do - not require manual clean-up by the caller. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_read_stub"].CreateReadSession - - @property - def read_rows(self): - """Return the gRPC stub for :meth:`BigQueryReadClient.read_rows`. - - Reads rows from the stream in the format prescribed by the ReadSession. - Each response contains one or more table rows, up to a maximum of 100 MiB - per response; read requests which attempt to read individual rows larger - than 100 MiB will fail. - - Each request also returns a set of stream statistics reflecting the current - state of the stream. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_read_stub"].ReadRows - - @property - def split_read_stream(self): - """Return the gRPC stub for :meth:`BigQueryReadClient.split_read_stream`. - - An indicator of the behavior of a given field (for example, that a - field is required in requests, or given as output but ignored as input). - This **does not** change the behavior in protocol buffers itself; it - only denotes the behavior and may affect how API tooling handles the - field. - - Note: This enum **may** receive new values in the future. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["big_query_read_stub"].SplitReadStream diff --git a/google/cloud/bigquery_storage_v1beta2/proto/__init__.py b/google/cloud/bigquery_storage_v1beta2/proto/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/bigquery_storage_v1beta2/proto/arrow.proto b/google/cloud/bigquery_storage_v1beta2/proto/arrow.proto deleted file mode 100644 index 74733db9..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/arrow.proto +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta2; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; -option java_multiple_files = true; -option java_outer_classname = "ArrowProto"; -option java_package = "com.google.cloud.bigquery.storage.v1beta2"; - -// Arrow schema as specified in -// https://arrow.apache.org/docs/python/api/datatypes.html -// and serialized to bytes using IPC: -// https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc -// -// See code samples on how this message can be deserialized. -message ArrowSchema { - // IPC serialized Arrow schema. - bytes serialized_schema = 1; -} - -// Arrow RecordBatch. -message ArrowRecordBatch { - // IPC-serialized Arrow RecordBatch. - bytes serialized_record_batch = 1; -} - -// Contains options specific to Arrow Serialization. -message ArrowSerializationOptions { - // The IPC format to use when serializing Arrow streams. - enum Format { - // If unspecied the IPC format as of 0.15 release will be used. - FORMAT_UNSPECIFIED = 0; - - // Use the legacy IPC message format as of Apache Arrow Release 0.14. - ARROW_0_14 = 1; - - // Use the message format as of Apache Arrow Release 0.15. - ARROW_0_15 = 2; - } - - // The Arrow IPC format to use. - Format format = 1; -} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/arrow_pb2.py b/google/cloud/bigquery_storage_v1beta2/proto/arrow_pb2.py deleted file mode 100644 index a9237ddf..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/arrow_pb2.py +++ /dev/null @@ -1,256 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta2/proto/arrow.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta2/proto/arrow.proto", - package="google.cloud.bigquery.storage.v1beta2", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta2B\nArrowProtoP\001ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n7google/cloud/bigquery_storage_v1beta2/proto/arrow.proto\x12%google.cloud.bigquery.storage.v1beta2"(\n\x0b\x41rrowSchema\x12\x19\n\x11serialized_schema\x18\x01 \x01(\x0c"3\n\x10\x41rrowRecordBatch\x12\x1f\n\x17serialized_record_batch\x18\x01 \x01(\x0c"\xb6\x01\n\x19\x41rrowSerializationOptions\x12W\n\x06\x66ormat\x18\x01 \x01(\x0e\x32G.google.cloud.bigquery.storage.v1beta2.ArrowSerializationOptions.Format"@\n\x06\x46ormat\x12\x16\n\x12\x46ORMAT_UNSPECIFIED\x10\x00\x12\x0e\n\nARROW_0_14\x10\x01\x12\x0e\n\nARROW_0_15\x10\x02\x42\x87\x01\n)com.google.cloud.bigquery.storage.v1beta2B\nArrowProtoP\x01ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storageb\x06proto3', -) - - -_ARROWSERIALIZATIONOPTIONS_FORMAT = _descriptor.EnumDescriptor( - name="Format", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowSerializationOptions.Format", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="FORMAT_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARROW_0_14", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARROW_0_15", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=312, - serialized_end=376, -) -_sym_db.RegisterEnumDescriptor(_ARROWSERIALIZATIONOPTIONS_FORMAT) - - -_ARROWSCHEMA = _descriptor.Descriptor( - name="ArrowSchema", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowSchema", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_schema", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowSchema.serialized_schema", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=98, - serialized_end=138, -) - - -_ARROWRECORDBATCH = _descriptor.Descriptor( - name="ArrowRecordBatch", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowRecordBatch", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_record_batch", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowRecordBatch.serialized_record_batch", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=140, - serialized_end=191, -) - - -_ARROWSERIALIZATIONOPTIONS = _descriptor.Descriptor( - name="ArrowSerializationOptions", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowSerializationOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="format", - full_name="google.cloud.bigquery.storage.v1beta2.ArrowSerializationOptions.format", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[_ARROWSERIALIZATIONOPTIONS_FORMAT,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=194, - serialized_end=376, -) - -_ARROWSERIALIZATIONOPTIONS.fields_by_name[ - "format" -].enum_type = _ARROWSERIALIZATIONOPTIONS_FORMAT -_ARROWSERIALIZATIONOPTIONS_FORMAT.containing_type = _ARROWSERIALIZATIONOPTIONS -DESCRIPTOR.message_types_by_name["ArrowSchema"] = _ARROWSCHEMA -DESCRIPTOR.message_types_by_name["ArrowRecordBatch"] = _ARROWRECORDBATCH -DESCRIPTOR.message_types_by_name[ - "ArrowSerializationOptions" -] = _ARROWSERIALIZATIONOPTIONS -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ArrowSchema = _reflection.GeneratedProtocolMessageType( - "ArrowSchema", - (_message.Message,), - { - "DESCRIPTOR": _ARROWSCHEMA, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.arrow_pb2", - "__doc__": """Arrow schema as specified in - https://arrow.apache.org/docs/python/api/datatypes.html and serialized - to bytes using IPC: - https://arrow.apache.org/docs/format/Columnar.html#serialization-and- - interprocess-communication-ipc See code samples on how this message - can be deserialized. - - Attributes: - serialized_schema: - IPC serialized Arrow schema. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ArrowSchema) - }, -) -_sym_db.RegisterMessage(ArrowSchema) - -ArrowRecordBatch = _reflection.GeneratedProtocolMessageType( - "ArrowRecordBatch", - (_message.Message,), - { - "DESCRIPTOR": _ARROWRECORDBATCH, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.arrow_pb2", - "__doc__": """Arrow RecordBatch. - - Attributes: - serialized_record_batch: - IPC-serialized Arrow RecordBatch. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ArrowRecordBatch) - }, -) -_sym_db.RegisterMessage(ArrowRecordBatch) - -ArrowSerializationOptions = _reflection.GeneratedProtocolMessageType( - "ArrowSerializationOptions", - (_message.Message,), - { - "DESCRIPTOR": _ARROWSERIALIZATIONOPTIONS, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.arrow_pb2", - "__doc__": """Contains options specific to Arrow Serialization. - - Attributes: - format: - The Arrow IPC format to use. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ArrowSerializationOptions) - }, -) -_sym_db.RegisterMessage(ArrowSerializationOptions) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta2/proto/arrow_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta2/proto/arrow_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/arrow_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta2/proto/avro.proto b/google/cloud/bigquery_storage_v1beta2/proto/avro.proto deleted file mode 100644 index 37194690..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/avro.proto +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta2; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; -option java_multiple_files = true; -option java_outer_classname = "AvroProto"; -option java_package = "com.google.cloud.bigquery.storage.v1beta2"; - -// Avro schema. -message AvroSchema { - // Json serialized schema, as described at - // https://avro.apache.org/docs/1.8.1/spec.html. - string schema = 1; -} - -// Avro rows. -message AvroRows { - // Binary serialized rows in a block. - bytes serialized_binary_rows = 1; -} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/avro_pb2.py b/google/cloud/bigquery_storage_v1beta2/proto/avro_pb2.py deleted file mode 100644 index f55c9f73..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/avro_pb2.py +++ /dev/null @@ -1,147 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta2/proto/avro.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta2/proto/avro.proto", - package="google.cloud.bigquery.storage.v1beta2", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta2B\tAvroProtoP\001ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n6google/cloud/bigquery_storage_v1beta2/proto/avro.proto\x12%google.cloud.bigquery.storage.v1beta2"\x1c\n\nAvroSchema\x12\x0e\n\x06schema\x18\x01 \x01(\t"*\n\x08\x41vroRows\x12\x1e\n\x16serialized_binary_rows\x18\x01 \x01(\x0c\x42\x86\x01\n)com.google.cloud.bigquery.storage.v1beta2B\tAvroProtoP\x01ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storageb\x06proto3', -) - - -_AVROSCHEMA = _descriptor.Descriptor( - name="AvroSchema", - full_name="google.cloud.bigquery.storage.v1beta2.AvroSchema", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="schema", - full_name="google.cloud.bigquery.storage.v1beta2.AvroSchema.schema", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=97, - serialized_end=125, -) - - -_AVROROWS = _descriptor.Descriptor( - name="AvroRows", - full_name="google.cloud.bigquery.storage.v1beta2.AvroRows", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="serialized_binary_rows", - full_name="google.cloud.bigquery.storage.v1beta2.AvroRows.serialized_binary_rows", - index=0, - number=1, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"", - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=127, - serialized_end=169, -) - -DESCRIPTOR.message_types_by_name["AvroSchema"] = _AVROSCHEMA -DESCRIPTOR.message_types_by_name["AvroRows"] = _AVROROWS -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -AvroSchema = _reflection.GeneratedProtocolMessageType( - "AvroSchema", - (_message.Message,), - { - "DESCRIPTOR": _AVROSCHEMA, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.avro_pb2", - "__doc__": """Avro schema. - - Attributes: - schema: - Json serialized schema, as described at - https://avro.apache.org/docs/1.8.1/spec.html. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.AvroSchema) - }, -) -_sym_db.RegisterMessage(AvroSchema) - -AvroRows = _reflection.GeneratedProtocolMessageType( - "AvroRows", - (_message.Message,), - { - "DESCRIPTOR": _AVROROWS, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.avro_pb2", - "__doc__": """Avro rows. - - Attributes: - serialized_binary_rows: - Binary serialized rows in a block. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.AvroRows) - }, -) -_sym_db.RegisterMessage(AvroRows) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta2/proto/avro_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta2/proto/avro_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/avro_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta2/proto/storage.proto b/google/cloud/bigquery_storage_v1beta2/proto/storage.proto deleted file mode 100644 index 373c8352..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/storage.proto +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.bigquery.storage.v1beta2; - -import "google/api/annotations.proto"; -import "google/api/client.proto"; -import "google/api/field_behavior.proto"; -import "google/api/resource.proto"; -import "google/cloud/bigquery/storage/v1beta2/arrow.proto"; -import "google/cloud/bigquery/storage/v1beta2/avro.proto"; -import "google/cloud/bigquery/storage/v1beta2/stream.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; -option java_multiple_files = true; -option java_outer_classname = "StorageProto"; -option java_package = "com.google.cloud.bigquery.storage.v1beta2"; - -// BigQuery Read API. -// -// The Read API can be used to read data from BigQuery. -service BigQueryRead { - option (google.api.default_host) = "bigquerystorage.googleapis.com"; - option (google.api.oauth_scopes) = - "https://www.googleapis.com/auth/bigquery," - "https://www.googleapis.com/auth/bigquery.readonly," - "https://www.googleapis.com/auth/cloud-platform"; - - // Creates a new read session. A read session divides the contents of a - // BigQuery table into one or more streams, which can then be used to read - // data from the table. The read session also specifies properties of the - // data to be read, such as a list of columns or a push-down filter describing - // the rows to be returned. - // - // A particular row can be read by at most one stream. When the caller has - // reached the end of each stream in the session, then all the data in the - // table has been read. - // - // Data is assigned to each stream such that roughly the same number of - // rows can be read from each stream. Because the server-side unit for - // assigning data is collections of rows, the API does not guarantee that - // each stream will return the same number or rows. Additionally, the - // limits are enforced based on the number of pre-filtered rows, so some - // filters can lead to lopsided assignments. - // - // Read sessions automatically expire 24 hours after they are created and do - // not require manual clean-up by the caller. - rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) { - option (google.api.http) = { - post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}" - body: "*" - }; - option (google.api.method_signature) = "parent,read_session,max_stream_count"; - } - - // Reads rows from the stream in the format prescribed by the ReadSession. - // Each response contains one or more table rows, up to a maximum of 100 MiB - // per response; read requests which attempt to read individual rows larger - // than 100 MiB will fail. - // - // Each request also returns a set of stream statistics reflecting the current - // state of the stream. - rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) { - option (google.api.http) = { - get: "/v1beta2/{read_stream=projects/*/locations/*/sessions/*/streams/*}" - }; - option (google.api.method_signature) = "read_stream,offset"; - } - - // Splits a given `ReadStream` into two `ReadStream` objects. These - // `ReadStream` objects are referred to as the primary and the residual - // streams of the split. The original `ReadStream` can still be read from in - // the same manner as before. Both of the returned `ReadStream` objects can - // also be read from, and the rows returned by both child streams will be - // the same as the rows read from the original stream. - // - // Moreover, the two child streams will be allocated back-to-back in the - // original `ReadStream`. Concretely, it is guaranteed that for streams - // original, primary, and residual, that original[0-j] = primary[0-j] and - // original[j-n] = residual[0-m] once the streams have been read to - // completion. - rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) { - option (google.api.http) = { - get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}" - }; - } -} - -// Request message for `CreateReadSession`. -message CreateReadSessionRequest { - // Required. The request project that owns the session, in the form of - // `projects/{project_id}`. - string parent = 1 [ - (google.api.field_behavior) = REQUIRED, - (google.api.resource_reference) = { - type: "cloudresourcemanager.googleapis.com/Project" - } - ]; - - // Required. Session to be created. - ReadSession read_session = 2 [(google.api.field_behavior) = REQUIRED]; - - // Max initial number of streams. If unset or zero, the server will - // provide a value of streams so as to produce reasonable throughput. Must be - // non-negative. The number of streams may be lower than the requested number, - // depending on the amount parallelism that is reasonable for the table. Error - // will be returned if the max count is greater than the current system - // max limit of 1,000. - // - // Streams must be read starting from offset 0. - int32 max_stream_count = 3; -} - -// Request message for `ReadRows`. -message ReadRowsRequest { - // Required. Stream to read rows from. - string read_stream = 1 [ - (google.api.field_behavior) = REQUIRED, - (google.api.resource_reference) = { - type: "bigquerystorage.googleapis.com/ReadStream" - } - ]; - - // The offset requested must be less than the last row read from Read. - // Requesting a larger offset is undefined. If not specified, start reading - // from offset zero. - int64 offset = 2; -} - -// Information on if the current connection is being throttled. -message ThrottleState { - // How much this connection is being throttled. Zero means no throttling, - // 100 means fully throttled. - int32 throttle_percent = 1; -} - -// Estimated stream statistics for a given Stream. -message StreamStats { - message Progress { - // The fraction of rows assigned to the stream that have been processed by - // the server so far, not including the rows in the current response - // message. - // - // This value, along with `at_response_end`, can be used to interpolate - // the progress made as the rows in the message are being processed using - // the following formula: `at_response_start + (at_response_end - - // at_response_start) * rows_processed_from_response / rows_in_response`. - // - // Note that if a filter is provided, the `at_response_end` value of the - // previous response may not necessarily be equal to the - // `at_response_start` value of the current response. - double at_response_start = 1; - - // Similar to `at_response_start`, except that this value includes the - // rows in the current response. - double at_response_end = 2; - } - - // Represents the progress of the current stream. - Progress progress = 2; -} - -// Response from calling `ReadRows` may include row data, progress and -// throttling information. -message ReadRowsResponse { - // Row data is returned in format specified during session creation. - oneof rows { - // Serialized row data in AVRO format. - AvroRows avro_rows = 3; - - // Serialized row data in Arrow RecordBatch format. - ArrowRecordBatch arrow_record_batch = 4; - } - - // Number of serialized rows in the rows block. - int64 row_count = 6; - - // Statistics for the stream. - StreamStats stats = 2; - - // Throttling state. If unset, the latest response still describes - // the current throttling status. - ThrottleState throttle_state = 5; -} - -// Request message for `SplitReadStream`. -message SplitReadStreamRequest { - // Required. Name of the stream to split. - string name = 1 [ - (google.api.field_behavior) = REQUIRED, - (google.api.resource_reference) = { - type: "bigquerystorage.googleapis.com/ReadStream" - } - ]; - - // A value in the range (0.0, 1.0) that specifies the fractional point at - // which the original stream should be split. The actual split point is - // evaluated on pre-filtered rows, so if a filter is provided, then there is - // no guarantee that the division of the rows between the new child streams - // will be proportional to this fractional value. Additionally, because the - // server-side unit for assigning data is collections of rows, this fraction - // will always map to a data storage boundary on the server side. - double fraction = 2; -} - -// Response message for `SplitReadStream`. -message SplitReadStreamResponse { - // Primary stream, which contains the beginning portion of - // |original_stream|. An empty value indicates that the original stream can no - // longer be split. - ReadStream primary_stream = 1; - - // Remainder stream, which contains the tail of |original_stream|. An empty - // value indicates that the original stream can no longer be split. - ReadStream remainder_stream = 2; -} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/storage_pb2.py b/google/cloud/bigquery_storage_v1beta2/proto/storage_pb2.py deleted file mode 100644 index 342a23d7..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/storage_pb2.py +++ /dev/null @@ -1,870 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta2/proto/storage.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import resource_pb2 as google_dot_api_dot_resource__pb2 -from google.cloud.bigquery_storage_v1beta2.proto import ( - arrow_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2, -) -from google.cloud.bigquery_storage_v1beta2.proto import ( - avro_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_avro__pb2, -) -from google.cloud.bigquery_storage_v1beta2.proto import ( - stream_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_stream__pb2, -) - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta2/proto/storage.proto", - package="google.cloud.bigquery.storage.v1beta2", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta2B\014StorageProtoP\001ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n9google/cloud/bigquery_storage_v1beta2/proto/storage.proto\x12%google.cloud.bigquery.storage.v1beta2\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\x1a\x37google/cloud/bigquery_storage_v1beta2/proto/arrow.proto\x1a\x36google/cloud/bigquery_storage_v1beta2/proto/avro.proto\x1a\x38google/cloud/bigquery_storage_v1beta2/proto/stream.proto"\xc8\x01\n\x18\x43reateReadSessionRequest\x12\x43\n\x06parent\x18\x01 \x01(\tB3\xe0\x41\x02\xfa\x41-\n+cloudresourcemanager.googleapis.com/Project\x12M\n\x0cread_session\x18\x02 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta2.ReadSessionB\x03\xe0\x41\x02\x12\x18\n\x10max_stream_count\x18\x03 \x01(\x05"i\n\x0fReadRowsRequest\x12\x46\n\x0bread_stream\x18\x01 \x01(\tB1\xe0\x41\x02\xfa\x41+\n)bigquerystorage.googleapis.com/ReadStream\x12\x0e\n\x06offset\x18\x02 \x01(\x03")\n\rThrottleState\x12\x18\n\x10throttle_percent\x18\x01 \x01(\x05"\x9c\x01\n\x0bStreamStats\x12M\n\x08progress\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.storage.v1beta2.StreamStats.Progress\x1a>\n\x08Progress\x12\x19\n\x11\x61t_response_start\x18\x01 \x01(\x01\x12\x17\n\x0f\x61t_response_end\x18\x02 \x01(\x01"\xdb\x02\n\x10ReadRowsResponse\x12\x44\n\tavro_rows\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.storage.v1beta2.AvroRowsH\x00\x12U\n\x12\x61rrow_record_batch\x18\x04 \x01(\x0b\x32\x37.google.cloud.bigquery.storage.v1beta2.ArrowRecordBatchH\x00\x12\x11\n\trow_count\x18\x06 \x01(\x03\x12\x41\n\x05stats\x18\x02 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta2.StreamStats\x12L\n\x0ethrottle_state\x18\x05 \x01(\x0b\x32\x34.google.cloud.bigquery.storage.v1beta2.ThrottleStateB\x06\n\x04rows"k\n\x16SplitReadStreamRequest\x12?\n\x04name\x18\x01 \x01(\tB1\xe0\x41\x02\xfa\x41+\n)bigquerystorage.googleapis.com/ReadStream\x12\x10\n\x08\x66raction\x18\x02 \x01(\x01"\xb1\x01\n\x17SplitReadStreamResponse\x12I\n\x0eprimary_stream\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta2.ReadStream\x12K\n\x10remainder_stream\x18\x02 \x01(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta2.ReadStream2\xf3\x06\n\x0c\x42igQueryRead\x12\xf8\x01\n\x11\x43reateReadSession\x12?.google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest\x1a\x32.google.cloud.bigquery.storage.v1beta2.ReadSession"n\x82\xd3\xe4\x93\x02\x41".google.cloud.bigquery.storage.v1beta2.SplitReadStreamResponse"C\x82\xd3\xe4\x93\x02=\x12;/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}\x1a\xae\x01\xca\x41\x1e\x62igquerystorage.googleapis.com\xd2\x41\x89\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platformB\x89\x01\n)com.google.cloud.bigquery.storage.v1beta2B\x0cStorageProtoP\x01ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storageb\x06proto3', - dependencies=[ - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_resource__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_avro__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_stream__pb2.DESCRIPTOR, - ], -) - - -_CREATEREADSESSIONREQUEST = _descriptor.Descriptor( - name="CreateReadSessionRequest", - full_name="google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="parent", - full_name="google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest.parent", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A-\n+cloudresourcemanager.googleapis.com/Project", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="read_session", - full_name="google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest.read_session", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="max_stream_count", - full_name="google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest.max_stream_count", - index=2, - number=3, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=387, - serialized_end=587, -) - - -_READROWSREQUEST = _descriptor.Descriptor( - name="ReadRowsRequest", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="read_stream", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsRequest.read_stream", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A+\n)bigquerystorage.googleapis.com/ReadStream", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="offset", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsRequest.offset", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=589, - serialized_end=694, -) - - -_THROTTLESTATE = _descriptor.Descriptor( - name="ThrottleState", - full_name="google.cloud.bigquery.storage.v1beta2.ThrottleState", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="throttle_percent", - full_name="google.cloud.bigquery.storage.v1beta2.ThrottleState.throttle_percent", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=696, - serialized_end=737, -) - - -_STREAMSTATS_PROGRESS = _descriptor.Descriptor( - name="Progress", - full_name="google.cloud.bigquery.storage.v1beta2.StreamStats.Progress", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="at_response_start", - full_name="google.cloud.bigquery.storage.v1beta2.StreamStats.Progress.at_response_start", - index=0, - number=1, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="at_response_end", - full_name="google.cloud.bigquery.storage.v1beta2.StreamStats.Progress.at_response_end", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=834, - serialized_end=896, -) - -_STREAMSTATS = _descriptor.Descriptor( - name="StreamStats", - full_name="google.cloud.bigquery.storage.v1beta2.StreamStats", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="progress", - full_name="google.cloud.bigquery.storage.v1beta2.StreamStats.progress", - index=0, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_STREAMSTATS_PROGRESS,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=740, - serialized_end=896, -) - - -_READROWSRESPONSE = _descriptor.Descriptor( - name="ReadRowsResponse", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="avro_rows", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse.avro_rows", - index=0, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_record_batch", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse.arrow_record_batch", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_count", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse.row_count", - index=2, - number=6, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="stats", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse.stats", - index=3, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="throttle_state", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse.throttle_state", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="rows", - full_name="google.cloud.bigquery.storage.v1beta2.ReadRowsResponse.rows", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=899, - serialized_end=1246, -) - - -_SPLITREADSTREAMREQUEST = _descriptor.Descriptor( - name="SplitReadStreamRequest", - full_name="google.cloud.bigquery.storage.v1beta2.SplitReadStreamRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1beta2.SplitReadStreamRequest.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002\372A+\n)bigquerystorage.googleapis.com/ReadStream", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="fraction", - full_name="google.cloud.bigquery.storage.v1beta2.SplitReadStreamRequest.fraction", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1248, - serialized_end=1355, -) - - -_SPLITREADSTREAMRESPONSE = _descriptor.Descriptor( - name="SplitReadStreamResponse", - full_name="google.cloud.bigquery.storage.v1beta2.SplitReadStreamResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="primary_stream", - full_name="google.cloud.bigquery.storage.v1beta2.SplitReadStreamResponse.primary_stream", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="remainder_stream", - full_name="google.cloud.bigquery.storage.v1beta2.SplitReadStreamResponse.remainder_stream", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1358, - serialized_end=1535, -) - -_CREATEREADSESSIONREQUEST.fields_by_name[ - "read_session" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_stream__pb2._READSESSION -) -_STREAMSTATS_PROGRESS.containing_type = _STREAMSTATS -_STREAMSTATS.fields_by_name["progress"].message_type = _STREAMSTATS_PROGRESS -_READROWSRESPONSE.fields_by_name[ - "avro_rows" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_avro__pb2._AVROROWS -) -_READROWSRESPONSE.fields_by_name[ - "arrow_record_batch" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2._ARROWRECORDBATCH -) -_READROWSRESPONSE.fields_by_name["stats"].message_type = _STREAMSTATS -_READROWSRESPONSE.fields_by_name["throttle_state"].message_type = _THROTTLESTATE -_READROWSRESPONSE.oneofs_by_name["rows"].fields.append( - _READROWSRESPONSE.fields_by_name["avro_rows"] -) -_READROWSRESPONSE.fields_by_name[ - "avro_rows" -].containing_oneof = _READROWSRESPONSE.oneofs_by_name["rows"] -_READROWSRESPONSE.oneofs_by_name["rows"].fields.append( - _READROWSRESPONSE.fields_by_name["arrow_record_batch"] -) -_READROWSRESPONSE.fields_by_name[ - "arrow_record_batch" -].containing_oneof = _READROWSRESPONSE.oneofs_by_name["rows"] -_SPLITREADSTREAMRESPONSE.fields_by_name[ - "primary_stream" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_stream__pb2._READSTREAM -) -_SPLITREADSTREAMRESPONSE.fields_by_name[ - "remainder_stream" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_stream__pb2._READSTREAM -) -DESCRIPTOR.message_types_by_name["CreateReadSessionRequest"] = _CREATEREADSESSIONREQUEST -DESCRIPTOR.message_types_by_name["ReadRowsRequest"] = _READROWSREQUEST -DESCRIPTOR.message_types_by_name["ThrottleState"] = _THROTTLESTATE -DESCRIPTOR.message_types_by_name["StreamStats"] = _STREAMSTATS -DESCRIPTOR.message_types_by_name["ReadRowsResponse"] = _READROWSRESPONSE -DESCRIPTOR.message_types_by_name["SplitReadStreamRequest"] = _SPLITREADSTREAMREQUEST -DESCRIPTOR.message_types_by_name["SplitReadStreamResponse"] = _SPLITREADSTREAMRESPONSE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -CreateReadSessionRequest = _reflection.GeneratedProtocolMessageType( - "CreateReadSessionRequest", - (_message.Message,), - { - "DESCRIPTOR": _CREATEREADSESSIONREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Request message for ``CreateReadSession``. - - Attributes: - parent: - Required. The request project that owns the session, in the - form of ``projects/{project_id}``. - read_session: - Required. Session to be created. - max_stream_count: - Max initial number of streams. If unset or zero, the server - will provide a value of streams so as to produce reasonable - throughput. Must be non-negative. The number of streams may be - lower than the requested number, depending on the amount - parallelism that is reasonable for the table. Error will be - returned if the max count is greater than the current system - max limit of 1,000. Streams must be read starting from offset - 0. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.CreateReadSessionRequest) - }, -) -_sym_db.RegisterMessage(CreateReadSessionRequest) - -ReadRowsRequest = _reflection.GeneratedProtocolMessageType( - "ReadRowsRequest", - (_message.Message,), - { - "DESCRIPTOR": _READROWSREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Request message for ``ReadRows``. - - Attributes: - read_stream: - Required. Stream to read rows from. - offset: - The offset requested must be less than the last row read from - Read. Requesting a larger offset is undefined. If not - specified, start reading from offset zero. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ReadRowsRequest) - }, -) -_sym_db.RegisterMessage(ReadRowsRequest) - -ThrottleState = _reflection.GeneratedProtocolMessageType( - "ThrottleState", - (_message.Message,), - { - "DESCRIPTOR": _THROTTLESTATE, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Information on if the current connection is being throttled. - - Attributes: - throttle_percent: - How much this connection is being throttled. Zero means no - throttling, 100 means fully throttled. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ThrottleState) - }, -) -_sym_db.RegisterMessage(ThrottleState) - -StreamStats = _reflection.GeneratedProtocolMessageType( - "StreamStats", - (_message.Message,), - { - "Progress": _reflection.GeneratedProtocolMessageType( - "Progress", - (_message.Message,), - { - "DESCRIPTOR": _STREAMSTATS_PROGRESS, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Protocol buffer. - - Attributes: - at_response_start: - The fraction of rows assigned to the stream that have been - processed by the server so far, not including the rows in the - current response message. This value, along with - ``at_response_end``, can be used to interpolate the progress - made as the rows in the message are being processed using the - following formula: ``at_response_start + (at_response_end - - at_response_start) * rows_processed_from_response / - rows_in_response``. Note that if a filter is provided, the - ``at_response_end`` value of the previous response may not - necessarily be equal to the ``at_response_start`` value of the - current response. - at_response_end: - Similar to ``at_response_start``, except that this value - includes the rows in the current response. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.StreamStats.Progress) - }, - ), - "DESCRIPTOR": _STREAMSTATS, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Estimated stream statistics for a given Stream. - - Attributes: - progress: - Represents the progress of the current stream. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.StreamStats) - }, -) -_sym_db.RegisterMessage(StreamStats) -_sym_db.RegisterMessage(StreamStats.Progress) - -ReadRowsResponse = _reflection.GeneratedProtocolMessageType( - "ReadRowsResponse", - (_message.Message,), - { - "DESCRIPTOR": _READROWSRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Response from calling ``ReadRows`` may include row data, progress and - throttling information. - - Attributes: - rows: - Row data is returned in format specified during session - creation. - avro_rows: - Serialized row data in AVRO format. - arrow_record_batch: - Serialized row data in Arrow RecordBatch format. - row_count: - Number of serialized rows in the rows block. - stats: - Statistics for the stream. - throttle_state: - Throttling state. If unset, the latest response still - describes the current throttling status. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ReadRowsResponse) - }, -) -_sym_db.RegisterMessage(ReadRowsResponse) - -SplitReadStreamRequest = _reflection.GeneratedProtocolMessageType( - "SplitReadStreamRequest", - (_message.Message,), - { - "DESCRIPTOR": _SPLITREADSTREAMREQUEST, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Request message for ``SplitReadStream``. - - Attributes: - name: - Required. Name of the stream to split. - fraction: - A value in the range (0.0, 1.0) that specifies the fractional - point at which the original stream should be split. The actual - split point is evaluated on pre-filtered rows, so if a filter - is provided, then there is no guarantee that the division of - the rows between the new child streams will be proportional to - this fractional value. Additionally, because the server-side - unit for assigning data is collections of rows, this fraction - will always map to a data storage boundary on the server side. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.SplitReadStreamRequest) - }, -) -_sym_db.RegisterMessage(SplitReadStreamRequest) - -SplitReadStreamResponse = _reflection.GeneratedProtocolMessageType( - "SplitReadStreamResponse", - (_message.Message,), - { - "DESCRIPTOR": _SPLITREADSTREAMRESPONSE, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.storage_pb2", - "__doc__": """Response message for ``SplitReadStream``. - - Attributes: - primary_stream: - Primary stream, which contains the beginning portion of - \|original_stream|. An empty value indicates that the original - stream can no longer be split. - remainder_stream: - Remainder stream, which contains the tail of - \|original_stream|. An empty value indicates that the original - stream can no longer be split. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.SplitReadStreamResponse) - }, -) -_sym_db.RegisterMessage(SplitReadStreamResponse) - - -DESCRIPTOR._options = None -_CREATEREADSESSIONREQUEST.fields_by_name["parent"]._options = None -_CREATEREADSESSIONREQUEST.fields_by_name["read_session"]._options = None -_READROWSREQUEST.fields_by_name["read_stream"]._options = None -_SPLITREADSTREAMREQUEST.fields_by_name["name"]._options = None - -_BIGQUERYREAD = _descriptor.ServiceDescriptor( - name="BigQueryRead", - full_name="google.cloud.bigquery.storage.v1beta2.BigQueryRead", - file=DESCRIPTOR, - index=0, - serialized_options=b"\312A\036bigquerystorage.googleapis.com\322A\211\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform", - create_key=_descriptor._internal_create_key, - serialized_start=1538, - serialized_end=2421, - methods=[ - _descriptor.MethodDescriptor( - name="CreateReadSession", - full_name="google.cloud.bigquery.storage.v1beta2.BigQueryRead.CreateReadSession", - index=0, - containing_service=None, - input_type=_CREATEREADSESSIONREQUEST, - output_type=google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_stream__pb2._READSESSION, - serialized_options=b'\202\323\344\223\002A" 5" - // "date_field = CAST('2014-9-27' as DATE)" - // "nullable_field is not NULL" - // "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))" - // "numeric_field BETWEEN 1.0 AND 5.0" - string row_restriction = 2; - - // Optional. Options specific to the Apache Arrow output format. - ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL]; - } - - // Output only. Unique identifier for the session, in the form - // `projects/{project_id}/locations/{location}/sessions/{session_id}`. - string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Time at which the session becomes invalid. After this time, subsequent - // requests to read this Session will return errors. The expire_time is - // automatically assigned and currently cannot be specified or updated. - google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Immutable. Data format of the output data. - DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE]; - - // The schema for the read. If read_options.selected_fields is set, the - // schema may be different from the table schema as it will only contain - // the selected fields. - oneof schema { - // Output only. Avro schema. - AvroSchema avro_schema = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Arrow schema. - ArrowSchema arrow_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; - } - - // Immutable. Table that this ReadSession is reading from, in the form - // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id} - string table = 6 [ - (google.api.field_behavior) = IMMUTABLE, - (google.api.resource_reference) = { - type: "bigquery.googleapis.com/Table" - } - ]; - - // Optional. Any modifiers which are applied when reading from the specified table. - TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. Read options for this session (e.g. column selection, filters). - TableReadOptions read_options = 8 [(google.api.field_behavior) = OPTIONAL]; - - // Output only. A list of streams created with the session. - // - // At least one stream is created with the session. In the future, larger - // request_stream_count values *may* result in this list being unpopulated, - // in that case, the user will need to use a List method to get the streams - // instead, which is not yet available. - repeated ReadStream streams = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; -} - -// Information about a single stream that gets data out of the storage system. -// Most of the information about `ReadStream` instances is aggregated, making -// `ReadStream` lightweight. -message ReadStream { - option (google.api.resource) = { - type: "bigquerystorage.googleapis.com/ReadStream" - pattern: "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}" - }; - - // Output only. Name of the stream, in the form - // `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`. - string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; -} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/stream_pb2.py b/google/cloud/bigquery_storage_v1beta2/proto/stream_pb2.py deleted file mode 100644 index 24651ee1..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/stream_pb2.py +++ /dev/null @@ -1,628 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_storage_v1beta2/proto/stream.proto -"""Generated protocol buffer code.""" -from google.protobuf.internal import enum_type_wrapper -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import resource_pb2 as google_dot_api_dot_resource__pb2 -from google.cloud.bigquery_storage_v1beta2.proto import ( - arrow_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2, -) -from google.cloud.bigquery_storage_v1beta2.proto import ( - avro_pb2 as google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_avro__pb2, -) -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_storage_v1beta2/proto/stream.proto", - package="google.cloud.bigquery.storage.v1beta2", - syntax="proto3", - serialized_options=b"\n)com.google.cloud.bigquery.storage.v1beta2B\013StreamProtoP\001ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage\352AU\n\035bigquery.googleapis.com/Table\0224projects/{project}/datasets/{dataset}/tables/{table}", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n8google/cloud/bigquery_storage_v1beta2/proto/stream.proto\x12%google.cloud.bigquery.storage.v1beta2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x19google/api/resource.proto\x1a\x37google/cloud/bigquery_storage_v1beta2/proto/arrow.proto\x1a\x36google/cloud/bigquery_storage_v1beta2/proto/avro.proto\x1a\x1fgoogle/protobuf/timestamp.proto"\xf2\x07\n\x0bReadSession\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x34\n\x0b\x65xpire_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.TimestampB\x03\xe0\x41\x03\x12K\n\x0b\x64\x61ta_format\x18\x03 \x01(\x0e\x32\x31.google.cloud.bigquery.storage.v1beta2.DataFormatB\x03\xe0\x41\x05\x12M\n\x0b\x61vro_schema\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta2.AvroSchemaB\x03\xe0\x41\x03H\x00\x12O\n\x0c\x61rrow_schema\x18\x05 \x01(\x0b\x32\x32.google.cloud.bigquery.storage.v1beta2.ArrowSchemaB\x03\xe0\x41\x03H\x00\x12\x34\n\x05table\x18\x06 \x01(\tB%\xe0\x41\x05\xfa\x41\x1f\n\x1d\x62igquery.googleapis.com/Table\x12_\n\x0ftable_modifiers\x18\x07 \x01(\x0b\x32\x41.google.cloud.bigquery.storage.v1beta2.ReadSession.TableModifiersB\x03\xe0\x41\x01\x12^\n\x0cread_options\x18\x08 \x01(\x0b\x32\x43.google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptionsB\x03\xe0\x41\x01\x12G\n\x07streams\x18\n \x03(\x0b\x32\x31.google.cloud.bigquery.storage.v1beta2.ReadStreamB\x03\xe0\x41\x03\x1a\x43\n\x0eTableModifiers\x12\x31\n\rsnapshot_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x1a\xb0\x01\n\x10TableReadOptions\x12\x17\n\x0fselected_fields\x18\x01 \x03(\t\x12\x17\n\x0frow_restriction\x18\x02 \x01(\t\x12j\n\x1b\x61rrow_serialization_options\x18\x03 \x01(\x0b\x32@.google.cloud.bigquery.storage.v1beta2.ArrowSerializationOptionsB\x03\xe0\x41\x01:k\xea\x41h\n*bigquerystorage.googleapis.com/ReadSession\x12:projects/{project}/locations/{location}/sessions/{session}B\x08\n\x06schema"\x9c\x01\n\nReadStream\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x03:{\xea\x41x\n)bigquerystorage.googleapis.com/ReadStream\x12Kprojects/{project}/locations/{location}/sessions/{session}/streams/{stream}*>\n\nDataFormat\x12\x1b\n\x17\x44\x41TA_FORMAT_UNSPECIFIED\x10\x00\x12\x08\n\x04\x41VRO\x10\x01\x12\t\n\x05\x41RROW\x10\x02\x42\xe0\x01\n)com.google.cloud.bigquery.storage.v1beta2B\x0bStreamProtoP\x01ZLgoogle.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage\xea\x41U\n\x1d\x62igquery.googleapis.com/Table\x12\x34projects/{project}/datasets/{dataset}/tables/{table}b\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_resource__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_avro__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - ], -) - -_DATAFORMAT = _descriptor.EnumDescriptor( - name="DataFormat", - full_name="google.cloud.bigquery.storage.v1beta2.DataFormat", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DATA_FORMAT_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="AVRO", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARROW", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1477, - serialized_end=1539, -) -_sym_db.RegisterEnumDescriptor(_DATAFORMAT) - -DataFormat = enum_type_wrapper.EnumTypeWrapper(_DATAFORMAT) -DATA_FORMAT_UNSPECIFIED = 0 -AVRO = 1 -ARROW = 2 - - -_READSESSION_TABLEMODIFIERS = _descriptor.Descriptor( - name="TableModifiers", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.TableModifiers", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="snapshot_time", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.TableModifiers.snapshot_time", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=951, - serialized_end=1018, -) - -_READSESSION_TABLEREADOPTIONS = _descriptor.Descriptor( - name="TableReadOptions", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="selected_fields", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptions.selected_fields", - index=0, - number=1, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="row_restriction", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptions.row_restriction", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_serialization_options", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptions.arrow_serialization_options", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1021, - serialized_end=1197, -) - -_READSESSION = _descriptor.Descriptor( - name="ReadSession", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="expire_time", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.expire_time", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_format", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.data_format", - index=2, - number=3, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\005", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="avro_schema", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.avro_schema", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="arrow_schema", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.arrow_schema", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.table", - index=5, - number=6, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\005\372A\037\n\035bigquery.googleapis.com/Table", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="table_modifiers", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.table_modifiers", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="read_options", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.read_options", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="streams", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.streams", - index=8, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_READSESSION_TABLEMODIFIERS, _READSESSION_TABLEREADOPTIONS,], - enum_types=[], - serialized_options=b"\352Ah\n*bigquerystorage.googleapis.com/ReadSession\022:projects/{project}/locations/{location}/sessions/{session}", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="schema", - full_name="google.cloud.bigquery.storage.v1beta2.ReadSession.schema", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=306, - serialized_end=1316, -) - - -_READSTREAM = _descriptor.Descriptor( - name="ReadStream", - full_name="google.cloud.bigquery.storage.v1beta2.ReadStream", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.storage.v1beta2.ReadStream.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"\352Ax\n)bigquerystorage.googleapis.com/ReadStream\022Kprojects/{project}/locations/{location}/sessions/{session}/streams/{stream}", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1319, - serialized_end=1475, -) - -_READSESSION_TABLEMODIFIERS.fields_by_name[ - "snapshot_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_READSESSION_TABLEMODIFIERS.containing_type = _READSESSION -_READSESSION_TABLEREADOPTIONS.fields_by_name[ - "arrow_serialization_options" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2._ARROWSERIALIZATIONOPTIONS -) -_READSESSION_TABLEREADOPTIONS.containing_type = _READSESSION -_READSESSION.fields_by_name[ - "expire_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_READSESSION.fields_by_name["data_format"].enum_type = _DATAFORMAT -_READSESSION.fields_by_name[ - "avro_schema" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_avro__pb2._AVROSCHEMA -) -_READSESSION.fields_by_name[ - "arrow_schema" -].message_type = ( - google_dot_cloud_dot_bigquery__storage__v1beta2_dot_proto_dot_arrow__pb2._ARROWSCHEMA -) -_READSESSION.fields_by_name[ - "table_modifiers" -].message_type = _READSESSION_TABLEMODIFIERS -_READSESSION.fields_by_name["read_options"].message_type = _READSESSION_TABLEREADOPTIONS -_READSESSION.fields_by_name["streams"].message_type = _READSTREAM -_READSESSION.oneofs_by_name["schema"].fields.append( - _READSESSION.fields_by_name["avro_schema"] -) -_READSESSION.fields_by_name[ - "avro_schema" -].containing_oneof = _READSESSION.oneofs_by_name["schema"] -_READSESSION.oneofs_by_name["schema"].fields.append( - _READSESSION.fields_by_name["arrow_schema"] -) -_READSESSION.fields_by_name[ - "arrow_schema" -].containing_oneof = _READSESSION.oneofs_by_name["schema"] -DESCRIPTOR.message_types_by_name["ReadSession"] = _READSESSION -DESCRIPTOR.message_types_by_name["ReadStream"] = _READSTREAM -DESCRIPTOR.enum_types_by_name["DataFormat"] = _DATAFORMAT -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ReadSession = _reflection.GeneratedProtocolMessageType( - "ReadSession", - (_message.Message,), - { - "TableModifiers": _reflection.GeneratedProtocolMessageType( - "TableModifiers", - (_message.Message,), - { - "DESCRIPTOR": _READSESSION_TABLEMODIFIERS, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.stream_pb2", - "__doc__": """Additional attributes when reading a table. - - Attributes: - snapshot_time: - The snapshot time of the table. If not set, interpreted as - now. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ReadSession.TableModifiers) - }, - ), - "TableReadOptions": _reflection.GeneratedProtocolMessageType( - "TableReadOptions", - (_message.Message,), - { - "DESCRIPTOR": _READSESSION_TABLEREADOPTIONS, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.stream_pb2", - "__doc__": """Options dictating how we read a table. - - Attributes: - selected_fields: - Names of the fields in the table that should be read. If - empty, all fields will be read. If the specified field is a - nested field, all the sub-fields in the field will be - selected. The output field order is unrelated to the order of - fields in selected_fields. - row_restriction: - SQL text filtering statement, similar to a WHERE clause in a - query. Aggregates are not supported. Examples: “int_field > - 5” “date_field = CAST(‘2014-9-27’ as DATE)” “nullable_field is - not NULL” “st_equals(geo_field, st_geofromtext(”POINT(2, - 2)“))” “numeric_field BETWEEN 1.0 AND 5.0” - arrow_serialization_options: - Optional. Options specific to the Apache Arrow output format. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ReadSession.TableReadOptions) - }, - ), - "DESCRIPTOR": _READSESSION, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.stream_pb2", - "__doc__": """Information about the ReadSession. - - Attributes: - name: - Output only. Unique identifier for the session, in the form `` - projects/{project_id}/locations/{location}/sessions/{session_i - d}``. - expire_time: - Output only. Time at which the session becomes invalid. After - this time, subsequent requests to read this Session will - return errors. The expire_time is automatically assigned and - currently cannot be specified or updated. - data_format: - Immutable. Data format of the output data. - schema: - The schema for the read. If read_options.selected_fields is - set, the schema may be different from the table schema as it - will only contain the selected fields. - avro_schema: - Output only. Avro schema. - arrow_schema: - Output only. Arrow schema. - table: - Immutable. Table that this ReadSession is reading from, in the - form \`projects/{project_id}/datasets/{dataset_id}/tables/{tab - le_id} - table_modifiers: - Optional. Any modifiers which are applied when reading from - the specified table. - read_options: - Optional. Read options for this session (e.g. column - selection, filters). - streams: - Output only. A list of streams created with the session. At - least one stream is created with the session. In the future, - larger request_stream_count values *may* result in this list - being unpopulated, in that case, the user will need to use a - List method to get the streams instead, which is not yet - available. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ReadSession) - }, -) -_sym_db.RegisterMessage(ReadSession) -_sym_db.RegisterMessage(ReadSession.TableModifiers) -_sym_db.RegisterMessage(ReadSession.TableReadOptions) - -ReadStream = _reflection.GeneratedProtocolMessageType( - "ReadStream", - (_message.Message,), - { - "DESCRIPTOR": _READSTREAM, - "__module__": "google.cloud.bigquery_storage_v1beta2.proto.stream_pb2", - "__doc__": """Information about a single stream that gets data out of the storage - system. Most of the information about ``ReadStream`` instances is - aggregated, making ``ReadStream`` lightweight. - - Attributes: - name: - Output only. Name of the stream, in the form ``projects/{proje - ct_id}/locations/{location}/sessions/{session_id}/streams/{str - eam_id}``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.storage.v1beta2.ReadStream) - }, -) -_sym_db.RegisterMessage(ReadStream) - - -DESCRIPTOR._options = None -_READSESSION_TABLEREADOPTIONS.fields_by_name[ - "arrow_serialization_options" -]._options = None -_READSESSION.fields_by_name["name"]._options = None -_READSESSION.fields_by_name["expire_time"]._options = None -_READSESSION.fields_by_name["data_format"]._options = None -_READSESSION.fields_by_name["avro_schema"]._options = None -_READSESSION.fields_by_name["arrow_schema"]._options = None -_READSESSION.fields_by_name["table"]._options = None -_READSESSION.fields_by_name["table_modifiers"]._options = None -_READSESSION.fields_by_name["read_options"]._options = None -_READSESSION.fields_by_name["streams"]._options = None -_READSESSION._options = None -_READSTREAM.fields_by_name["name"]._options = None -_READSTREAM._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_storage_v1beta2/proto/stream_pb2_grpc.py b/google/cloud/bigquery_storage_v1beta2/proto/stream_pb2_grpc.py deleted file mode 100644 index 8a939394..00000000 --- a/google/cloud/bigquery_storage_v1beta2/proto/stream_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_storage_v1beta2/types.py b/google/cloud/bigquery_storage_v1beta2/types.py deleted file mode 100644 index b4bc241c..00000000 --- a/google/cloud/bigquery_storage_v1beta2/types.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -import sys - -from google.api_core.protobuf_helpers import get_messages - -from google.cloud.bigquery_storage_v1beta2.proto import arrow_pb2 -from google.cloud.bigquery_storage_v1beta2.proto import avro_pb2 -from google.cloud.bigquery_storage_v1beta2.proto import storage_pb2 -from google.cloud.bigquery_storage_v1beta2.proto import stream_pb2 -from google.protobuf import timestamp_pb2 - - -_shared_modules = [ - timestamp_pb2, -] - -_local_modules = [ - arrow_pb2, - avro_pb2, - storage_pb2, - stream_pb2, -] - -names = [] - -for module in _shared_modules: # pragma: NO COVER - for name, message in get_messages(module).items(): - setattr(sys.modules[__name__], name, message) - names.append(name) -for module in _local_modules: - for name, message in get_messages(module).items(): - message.__module__ = "google.cloud.bigquery_storage_v1beta2.types" - setattr(sys.modules[__name__], name, message) - names.append(name) - - -__all__ = tuple(sorted(names)) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..4505b485 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +python_version = 3.6 +namespace_packages = True diff --git a/noxfile.py b/noxfile.py index 43aaeb69..188218a1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -27,8 +27,8 @@ BLACK_PATHS = ["docs", "google", "tests", "noxfile.py", "setup.py"] DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["2.7", "3.8"] -UNIT_TEST_PYTHON_VERSIONS = ["2.7", "3.5", "3.6", "3.7", "3.8"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] +UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8"] @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -70,8 +70,9 @@ def lint_setup_py(session): def default(session): # Install all test dependencies, then install this package in-place. + session.install("asyncmock", "pytest-asyncio") + session.install("mock", "pytest", "pytest-cov") - session.install("-e", ".") session.install("-e", ".[fastavro,pandas,pyarrow]") # Run py.test against the unit tests. @@ -126,7 +127,6 @@ def system(session): "mock", "pytest", "google-cloud-testutils", ) session.install("-e", ".[fastavro,pandas,pyarrow]") - session.install("-e", ".") # Run py.test against the system tests. if system_test_exists: @@ -143,7 +143,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=79") + session.run("coverage", "report", "--show-missing", "--fail-under=95") session.run("coverage", "erase") diff --git a/samples/quickstart/quickstart.py b/samples/quickstart/quickstart.py index 8358fdc1..ef42f02a 100644 --- a/samples/quickstart/quickstart.py +++ b/samples/quickstart/quickstart.py @@ -17,7 +17,8 @@ def main(project_id="your-project-id", snapshot_millis=0): # [START bigquerystorage_quickstart] - from google.cloud import bigquery_storage_v1 + from google.cloud.bigquery.storage import BigQueryReadClient + from google.cloud.bigquery.storage import types # TODO(developer): Set the project_id variable. # project_id = 'your-project-id' @@ -25,38 +26,35 @@ def main(project_id="your-project-id", snapshot_millis=0): # The read session is created in this project. This project can be # different from that which contains the table. - client = bigquery_storage_v1.BigQueryReadClient() + client = BigQueryReadClient() # This example reads baby name data from the public datasets. table = "projects/{}/datasets/{}/tables/{}".format( "bigquery-public-data", "usa_names", "usa_1910_current" ) - requested_session = bigquery_storage_v1.types.ReadSession() + requested_session = types.ReadSession() requested_session.table = table # This API can also deliver data serialized in Apache Arrow format. # This example leverages Apache Avro. - requested_session.data_format = bigquery_storage_v1.enums.DataFormat.AVRO + requested_session.data_format = types.DataFormat.AVRO # We limit the output columns to a subset of those allowed in the table, # and set a simple filter to only report names from the state of # Washington (WA). - requested_session.read_options.selected_fields.append("name") - requested_session.read_options.selected_fields.append("number") - requested_session.read_options.selected_fields.append("state") + requested_session.read_options.selected_fields = ["name", "number", "state"] requested_session.read_options.row_restriction = 'state = "WA"' # Set a snapshot time if it's been specified. - modifiers = None if snapshot_millis > 0: - requested_session.table_modifiers.snapshot_time.FromMilliseconds( - snapshot_millis - ) + snapshot_time = types.Timestamp() + snapshot_time.FromMilliseconds(snapshot_millis) + requested_session.table_modifiers.snapshot_time = snapshot_time parent = "projects/{}".format(project_id) session = client.create_read_session( - parent, - requested_session, + parent=parent, + read_session=requested_session, # We'll use only a single stream for reading data from the table. However, # if you wanted to fan out multiple readers you could do so by having a # reader process each individual stream. diff --git a/samples/to_dataframe/main_test.py b/samples/to_dataframe/main_test.py index 126333bf..ecce1685 100644 --- a/samples/to_dataframe/main_test.py +++ b/samples/to_dataframe/main_test.py @@ -21,7 +21,7 @@ def clients(): # [START bigquerystorage_pandas_tutorial_create_client] import google.auth from google.cloud import bigquery - from google.cloud import bigquery_storage_v1beta1 + from google.cloud.bigquery import storage # Explicitly create a credentials object. This allows you to use the same # credentials for both the BigQuery and BigQuery Storage clients, avoiding @@ -32,9 +32,7 @@ def clients(): # Make clients. bqclient = bigquery.Client(credentials=credentials, project=your_project_id,) - bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient( - credentials=credentials - ) + bqstorageclient = storage.BigQueryReadClient(credentials=credentials) # [END bigquerystorage_pandas_tutorial_create_client] # [END bigquerystorage_pandas_tutorial_all] return bqclient, bqstorageclient @@ -98,48 +96,46 @@ def test_query_to_dataframe(capsys, clients): def test_session_to_dataframe(capsys, clients): - from google.cloud import bigquery_storage_v1beta1 + from google.cloud.bigquery.storage import types bqclient, bqstorageclient = clients your_project_id = bqclient.project # [START bigquerystorage_pandas_tutorial_all] # [START bigquerystorage_pandas_tutorial_read_session] - table = bigquery_storage_v1beta1.types.TableReference() - table.project_id = "bigquery-public-data" - table.dataset_id = "new_york_trees" - table.table_id = "tree_species" + project_id = "bigquery-public-data" + dataset_id = "new_york_trees" + table_id = "tree_species" + table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}" # Select columns to read with read options. If no read options are # specified, the whole table is read. - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - read_options.selected_fields.append("species_common_name") - read_options.selected_fields.append("fall_color") + read_options = types.ReadSession.TableReadOptions( + selected_fields=["species_common_name", "fall_color"] + ) parent = "projects/{}".format(your_project_id) - session = bqstorageclient.create_read_session( - table, - parent, - read_options=read_options, + + requested_session = types.ReadSession( + table=table, # This API can also deliver data serialized in Apache Avro format. # This example leverages Apache Arrow. - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - # We use a LIQUID strategy in this example because we only read from a - # single stream. Consider BALANCED if you're consuming multiple streams - # concurrently and want more consistent stream sizes. - sharding_strategy=(bigquery_storage_v1beta1.enums.ShardingStrategy.LIQUID), + data_format=types.DataFormat.ARROW, + read_options=read_options, + ) + read_session = bqstorageclient.create_read_session( + parent=parent, read_session=requested_session ) # This example reads from only a single stream. Read from multiple streams # to fetch data faster. Note that the session may not contain any streams # if there are no rows to read. - stream = session.streams[0] - position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) - reader = bqstorageclient.read_rows(position) + stream = read_session.streams[0] + reader = bqstorageclient.read_rows(stream.name) - # Parse all Avro blocks and create a dataframe. This call requires a + # Parse all Arrow blocks and create a dataframe. This call requires a # session, because the session contains the schema for the row blocks. - dataframe = reader.to_dataframe(session) + dataframe = reader.to_dataframe(read_session) print(dataframe.head()) # [END bigquerystorage_pandas_tutorial_read_session] # [END bigquerystorage_pandas_tutorial_all] diff --git a/scripts/fixup_storage_v1_keywords.py b/scripts/fixup_storage_v1_keywords.py new file mode 100644 index 00000000..2fe0e587 --- /dev/null +++ b/scripts/fixup_storage_v1_keywords.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import libcst as cst +import pathlib +import sys +from typing import (Any, Callable, Dict, List, Sequence, Tuple) + + +def partition( + predicate: Callable[[Any], bool], + iterator: Sequence[Any] +) -> Tuple[List[Any], List[Any]]: + """A stable, out-of-place partition.""" + results = ([], []) + + for i in iterator: + results[int(predicate(i))].append(i) + + # Returns trueList, falseList + return results[1], results[0] + + +class storageCallTransformer(cst.CSTTransformer): + CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') + METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { + 'create_read_session': ('parent', 'read_session', 'max_stream_count', ), + 'read_rows': ('read_stream', 'offset', ), + 'split_read_stream': ('name', 'fraction', ), + + } + + def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: + try: + key = original.func.attr.value + kword_params = self.METHOD_TO_PARAMS[key] + except (AttributeError, KeyError): + # Either not a method from the API or too convoluted to be sure. + return updated + + # If the existing code is valid, keyword args come after positional args. + # Therefore, all positional args must map to the first parameters. + args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) + if any(k.keyword.value == "request" for k in kwargs): + # We've already fixed this file, don't fix it again. + return updated + + kwargs, ctrl_kwargs = partition( + lambda a: not a.keyword.value in self.CTRL_PARAMS, + kwargs + ) + + args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] + ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) + for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) + + request_arg = cst.Arg( + value=cst.Dict([ + cst.DictElement( + cst.SimpleString("'{}'".format(name)), + cst.Element(value=arg.value) + ) + # Note: the args + kwargs looks silly, but keep in mind that + # the control parameters had to be stripped out, and that + # those could have been passed positionally or by keyword. + for name, arg in zip(kword_params, args + kwargs)]), + keyword=cst.Name("request") + ) + + return updated.with_changes( + args=[request_arg] + ctrl_kwargs + ) + + +def fix_files( + in_dir: pathlib.Path, + out_dir: pathlib.Path, + *, + transformer=storageCallTransformer(), +): + """Duplicate the input dir to the output dir, fixing file method calls. + + Preconditions: + * in_dir is a real directory + * out_dir is a real, empty directory + """ + pyfile_gen = ( + pathlib.Path(os.path.join(root, f)) + for root, _, files in os.walk(in_dir) + for f in files if os.path.splitext(f)[1] == ".py" + ) + + for fpath in pyfile_gen: + with open(fpath, 'r') as f: + src = f.read() + + # Parse the code and insert method call fixes. + tree = cst.parse_module(src) + updated = tree.visit(transformer) + + # Create the path and directory structure for the new file. + updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) + updated_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate the updated source file at the corresponding path. + with open(updated_path, 'w') as f: + f.write(updated.code) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="""Fix up source that uses the storage client library. + +The existing sources are NOT overwritten but are copied to output_dir with changes made. + +Note: This tool operates at a best-effort level at converting positional + parameters in client method calls to keyword based parameters. + Cases where it WILL FAIL include + A) * or ** expansion in a method call. + B) Calls via function or method alias (includes free function calls) + C) Indirect or dispatched calls (e.g. the method is looked up dynamically) + + These all constitute false negatives. The tool will also detect false + positives when an API method shares a name with another method. +""") + parser.add_argument( + '-d', + '--input-directory', + required=True, + dest='input_dir', + help='the input directory to walk for python files to fix up', + ) + parser.add_argument( + '-o', + '--output-directory', + required=True, + dest='output_dir', + help='the directory to output files fixed via un-flattening', + ) + args = parser.parse_args() + input_dir = pathlib.Path(args.input_dir) + output_dir = pathlib.Path(args.output_dir) + if not input_dir.is_dir(): + print( + f"input directory '{input_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if not output_dir.is_dir(): + print( + f"output directory '{output_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if os.listdir(output_dir): + print( + f"output directory '{output_dir}' is not empty", + file=sys.stderr, + ) + sys.exit(-1) + + fix_files(input_dir, output_dir) diff --git a/setup.py b/setup.py index 4abe5dd4..4167081a 100644 --- a/setup.py +++ b/setup.py @@ -21,11 +21,12 @@ name = "google-cloud-bigquery-storage" description = "BigQuery Storage API API client library" -version = "1.1.0" +version = "2.0.0" release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.14.0, < 2.0.0dev", - 'enum34; python_version < "3.4"', + "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", + "proto-plus >= 1.4.0", + "libcst >= 0.2.5", ] extras = { "pandas": "pandas>=0.17.1", @@ -40,13 +41,18 @@ readme = readme_file.read() packages = [ - package for package in setuptools.find_packages() if package.startswith("google") + package + for package in setuptools.PEP420PackageFinder.find() + if package.startswith("google") ] namespaces = ["google"] if "google.cloud" in packages: namespaces.append("google.cloud") +if "google.cloud.bigquery" in packages: + namespaces.append("google.cloud.bigquery") + setuptools.setup( name=name, version=version, @@ -61,10 +67,7 @@ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", @@ -76,7 +79,8 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*", + python_requires=">=3.6", + scripts=["scripts/fixup_storage_v1_keywords.py"], include_package_data=True, zip_safe=False, ) diff --git a/synth.metadata b/synth.metadata index 82ec7bbf..030565d7 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,51 +4,25 @@ "git": { "name": ".", "remote": "git@github.com:plamut/python-bigquery-storage.git", - "sha": "ba84d5b22ec1270a362687854f51d74c5d96d862" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "7a906c83566d0d429b23da434a4dc2b4de7b117a", - "internalRef": "331220195" + "sha": "e019d01628884bb3a24495f48f5036c9160deabd" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "fdd03c161003ab97657cc0218f25c82c89ddf4b6" + "sha": "916c10e8581804df2b48a0f0457d848f3faa582e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "fdd03c161003ab97657cc0218f25c82c89ddf4b6" + "sha": "916c10e8581804df2b48a0f0457d848f3faa582e" } } ], "destinations": [ - { - "client": { - "source": "googleapis", - "apiName": "bigquery_storage", - "apiVersion": "v1beta1", - "language": "python", - "generator": "bazel" - } - }, - { - "client": { - "source": "googleapis", - "apiName": "bigquery_storage", - "apiVersion": "v1beta2", - "language": "python", - "generator": "bazel" - } - }, { "client": { "source": "googleapis", diff --git a/synth.py b/synth.py index 56a68902..2c867da7 100644 --- a/synth.py +++ b/synth.py @@ -22,7 +22,7 @@ gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -versions = ["v1beta1", "v1beta2", "v1"] +versions = ["v1"] for version in versions: library = gapic.py_library( @@ -50,7 +50,6 @@ # In the future once the read and write client are colocated in the same version, # we'll need to loop through through multiple clients. Perhaps by the time that # happens we'll be on a generator that needs less post-generation modifications. - clientinfo = { "file": "big_query_storage_client.py", "type": "storage", @@ -58,7 +57,7 @@ "badpkg": "google-cloud-bigquerystorage", "goodpkg": "google-cloud-bigquery-storage", } - if version in ["v1beta2","v1"]: + if version in {"v1"}: clientinfo = { "file": "big_query_read_client.py", "type": "read", @@ -66,111 +65,19 @@ "badpkg": "google-cloud-bigquerystorage", "goodpkg": "google-cloud-bigquery-storage", } - if version in ["v1alpha2"]: - clientinfo = { - "file": "big_query_write_client.py", - "type": "write", - "name": "BigQueryWriteClient", - "badpkg": "google-cloud-bigquerystorage", - "goodpkg": "google-cloud-bigquery-storage", - } - - s.replace( - [ - f"google/cloud/bigquery_storage_{version}/proto/storage_pb2.py", - f"google/cloud/bigquery_storage_{version}/proto/storage_pb2_grpc.py", - f"google/cloud/bigquery_storage_{version}/proto/stream_pb2.py", - f"google/cloud/bigquery_storage_{version}/proto/stream_pb2_grpc.py", - ], - f"from google.cloud.bigquery.storage_{version}.proto", - f"from google.cloud.bigquery_storage_{version}.proto", - ) - - # This is used to populate _GAPIC_LIBRARY_VERSION in the client. - s.replace( - f"google/cloud/bigquery_storage_{version}/gapic/{clientinfo['file']}", - clientinfo['badpkg'], - clientinfo['goodpkg'] - ) - - s.replace( - f"google/cloud/bigquery_storage_{version}/gapic/{clientinfo['file']}", - "import google.api_core.gapic_v1.method\n", - "\g<0>import google.api_core.path_template\n", - ) - - s.replace( - [f"tests/unit/gapic/{version}/test_big_query_{clientinfo['type']}_client_{version}.py"], - f"from google.cloud import bigquery_storage_{version}", - f"from google.cloud.bigquery_storage_{version}.gapic import big_query_{clientinfo['type']}_client # noqa", - ) - - s.replace( - [f"tests/unit/gapic/{version}/test_big_query_{clientinfo['type']}_client_{version}.py"], - f"bigquery_storage_{version}.{clientinfo['name']}", - f"big_query_{clientinfo['type']}_client.{clientinfo['name']}", - ) - - # START: Ignore lint and coverage - s.replace( - [f"google/cloud/bigquery_storage_{version}/gapic/big_query_{clientinfo['type']}_client.py"], - "if transport:", - "if transport: # pragma: no cover", - ) - - s.replace( - [f"google/cloud/bigquery_storage_{version}/gapic/big_query_{clientinfo['type']}_client.py"], - r"metadata.append\(routing_metadata\)", - "metadata.append(routing_metadata) # pragma: no cover", - ) - - s.replace( - [ - f"google/cloud/bigquery_storage_{version}/gapic/transports/big_query_{clientinfo['type']}_grpc_transport.py" - ], - "if channel is not None and credentials is not None:", - "if channel is not None and credentials is not None: # pragma: no cover", - ) - - s.replace( - [ - f"google/cloud/bigquery_storage_{version}/gapic/transports/big_query_{clientinfo['type']}_grpc_transport.py" - ], - "if channel is None:", - "if channel is None: # pragma: no cover", - ) - - s.replace( - [ - f"google/cloud/bigquery_storage_{version}/gapic/transports/big_query_{clientinfo['type']}_grpc_transport.py" - ], - r"google.api_core.grpc_helpers.create_channel\(", - "google.api_core.grpc_helpers.create_channel( # pragma: no cover", - ) - - # Fix up proto docs that are missing summary line. - s.replace( - f"google/cloud/bigquery_storage_{version}/proto/storage_pb2.py", - '"""Attributes:', - '"""Protocol buffer.\n\n Attributes:', - ) - # END: Ignore lint and coverage - # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- optional_deps = [".[fastavro,pandas,pyarrow]"] -system_test_deps = optional_deps + templated_files = common.py_library( - unit_cov_level=79, - cov_level=79, - samples_test=True, - system_test_dependencies=system_test_deps, - unit_test_dependencies=optional_deps, + microgenerator=True, samples=True, + unit_test_dependencies=optional_deps, + cov_level=95, ) -s.move(templated_files) +s.move(templated_files, excludes=[".coveragerc"]) # microgenerator has a good .coveragerc file # ---------------------------------------------------------------------------- @@ -187,20 +94,66 @@ '\g<0>\n\n session.install("google-cloud-bigquery")', ) -# remove the samples session from the main noxfile +# We want the default client accessible through "google.cloud.bigquery.storage" +# to be the hand-written client that wrap the generated client, as this path is +# the users' main "entry point" into the library. +# HOWEVER - we don't want to expose the async client just yet. +s.replace( + "google/cloud/bigquery/storage/__init__.py", + r"from google\.cloud\.bigquery\.storage_v1\.services.big_query_read.client import", + "from google.cloud.bigquery_storage_v1 import" +) +s.replace( + "google/cloud/bigquery/storage/__init__.py", + ( + r"from google\.cloud\.bigquery\.storage_v1\.services.big_query_read.async_client " + r"import BigQueryReadAsyncClient\n" + ), + "", +) +s.replace( + "google/cloud/bigquery/storage/__init__.py", + r"""["']BigQueryReadAsyncClient["'],\n""", + "", +) + +# Ditto for types and __version__, make them accessible through the consolidated +# entry point. +s.replace( + "google/cloud/bigquery/storage/__init__.py", + r"from google\.cloud\.bigquery\.storage_v1\.types\.arrow import ArrowRecordBatch", + ( + "from google.cloud.bigquery_storage_v1 import types\n" + "from google.cloud.bigquery_storage_v1 import __version__\n" + "\g<0>" + ), +) +s.replace( + "google/cloud/bigquery/storage/__init__.py", + r"""["']ArrowRecordBatch["']""", + ( + '"__version__",\n' + ' "types",\n' + " \g<0>" + ), +) + +# Fix redundant library installations in nox sessions (unit and system tests). s.replace( "noxfile.py", - r""" - @nox\.session\([^)]*\)\s+ - def\ samples\(session\): - .*? - (?= - @nox\.session\([^)]*\)\s+ - def\ cover\(session\): - ) - """, + ( + r'session\.install\("-e", "\."\)\n ' + r'(?=session\.install\("-e", "\.\[fastavro)' + ), "", - flags=re.VERBOSE | re.DOTALL +) +s.replace( + "noxfile.py", + ( + r'(?<=google-cloud-testutils", \)\n)' + r' session\.install\("-e", "\."\)\n' + ), + ' session.install("-e", ".[fastavro,pandas,pyarrow]")\n', ) # TODO(busunkim): Use latest sphinx after microgenerator transition diff --git a/tests/system/v1/conftest.py b/tests/system/v1/conftest.py index 84eb859c..901429fc 100644 --- a/tests/system/v1/conftest.py +++ b/tests/system/v1/conftest.py @@ -20,7 +20,7 @@ import pytest -from google.cloud import bigquery_storage_v1 +from google.cloud.bigquery import storage _TABLE_FORMAT = "projects/{}/datasets/{}/tables/{}" @@ -43,7 +43,7 @@ def credentials(): @pytest.fixture(scope="session") def client(credentials): - return bigquery_storage_v1.BigQueryReadClient(credentials=credentials) + return storage.BigQueryReadClient(credentials=credentials) @pytest.fixture() diff --git a/tests/system/v1/test_reader_dataframe_v1.py b/tests/system/v1/test_reader_dataframe_v1.py index ec3e983c..0d5b1899 100644 --- a/tests/system/v1/test_reader_dataframe_v1.py +++ b/tests/system/v1/test_reader_dataframe_v1.py @@ -19,11 +19,11 @@ import pyarrow.types import pytest -from google.cloud import bigquery_storage_v1 +from google.cloud.bigquery.storage import types def test_read_v1(client, project_id): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = "projects/{}/datasets/{}/tables/{}".format( "bigquery-public-data", "new_york_citibike", "citibike_stations" ) @@ -32,10 +32,14 @@ def test_read_v1(client, project_id): read_session.read_options.selected_fields.append("latitude") read_session.read_options.selected_fields.append("longitude") read_session.read_options.selected_fields.append("name") - read_session.data_format = bigquery_storage_v1.enums.DataFormat.ARROW + read_session.data_format = types.DataFormat.ARROW session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) assert len(session.streams) == 1 @@ -56,22 +60,23 @@ def test_read_v1(client, project_id): @pytest.mark.parametrize( "data_format,expected_schema_type", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO, "avro_schema"), - (bigquery_storage_v1.enums.DataFormat.ARROW, "arrow_schema"), - ), + ((types.DataFormat.AVRO, "avro_schema"), (types.DataFormat.ARROW, "arrow_schema")), ) def test_read_rows_to_dataframe(client, project_id, data_format, expected_schema_type): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = "projects/{}/datasets/{}/tables/{}".format( "bigquery-public-data", "new_york_citibike", "citibike_stations" ) read_session.data_format = data_format session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) - schema_type = session.WhichOneof("schema") + schema_type = session._pb.WhichOneof("schema") assert schema_type == expected_schema_type stream = session.streams[0].name diff --git a/tests/system/v1/test_reader_v1.py b/tests/system/v1/test_reader_v1.py index 5337d500..ff1f7191 100644 --- a/tests/system/v1/test_reader_v1.py +++ b/tests/system/v1/test_reader_v1.py @@ -24,8 +24,7 @@ import pytz from google.cloud import bigquery -from google.cloud import bigquery_storage_v1 -from google.protobuf import timestamp_pb2 +from google.cloud.bigquery.storage import types def _to_bq_table_ref(table_name_string, partition_suffix=""): @@ -55,25 +54,26 @@ def _to_bq_table_ref(table_name_string, partition_suffix=""): @pytest.mark.parametrize( "data_format,expected_schema_type", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO, "avro_schema"), - (bigquery_storage_v1.enums.DataFormat.ARROW, "arrow_schema"), - ), + ((types.DataFormat.AVRO, "avro_schema"), (types.DataFormat.ARROW, "arrow_schema")), ) def test_read_rows_as_blocks_full_table( client, project_id, small_table_reference, data_format, expected_schema_type ): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = small_table_reference read_session.data_format = data_format session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) stream = session.streams[0].name - schema_type = session.WhichOneof("schema") + schema_type = session._pb.WhichOneof("schema") assert schema_type == expected_schema_type blocks = list(client.read_rows(stream)) @@ -83,21 +83,22 @@ def test_read_rows_as_blocks_full_table( @pytest.mark.parametrize( "data_format,expected_schema_type", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO, "avro_schema"), - (bigquery_storage_v1.enums.DataFormat.ARROW, "arrow_schema"), - ), + ((types.DataFormat.AVRO, "avro_schema"), (types.DataFormat.ARROW, "arrow_schema")), ) def test_read_rows_as_rows_full_table( client, project_id, small_table_reference, data_format, expected_schema_type ): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = small_table_reference read_session.data_format = data_format session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) stream = session.streams[0].name @@ -107,19 +108,19 @@ def test_read_rows_as_rows_full_table( @pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO), - (bigquery_storage_v1.enums.DataFormat.ARROW), - ), + "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) ) def test_basic_nonfiltered_read(client, project_id, table_with_data_ref, data_format): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = table_with_data_ref read_session.data_format = data_format session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) stream = session.streams[0].name @@ -129,13 +130,17 @@ def test_basic_nonfiltered_read(client, project_id, table_with_data_ref, data_fo def test_filtered_rows_read(client, project_id, table_with_data_ref): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = table_with_data_ref - read_session.data_format = bigquery_storage_v1.enums.DataFormat.AVRO + read_session.data_format = types.DataFormat.AVRO read_session.read_options.row_restriction = "age >= 50" session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) stream = session.streams[0].name @@ -145,22 +150,22 @@ def test_filtered_rows_read(client, project_id, table_with_data_ref): @pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO), - (bigquery_storage_v1.enums.DataFormat.ARROW), - ), + "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) ) def test_column_selection_read(client, project_id, table_with_data_ref, data_format): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = table_with_data_ref read_session.data_format = data_format read_session.read_options.selected_fields.append("first_name") read_session.read_options.selected_fields.append("age") session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) stream = session.streams[0].name @@ -171,7 +176,7 @@ def test_column_selection_read(client, project_id, table_with_data_ref, data_for def test_snapshot(client, project_id, table_with_data_ref, bq_client): - before_new_data = timestamp_pb2.Timestamp() + before_new_data = types.Timestamp() before_new_data.GetCurrentTime() # load additional data into the table @@ -185,13 +190,17 @@ def test_snapshot(client, project_id, table_with_data_ref, bq_client): # read data using the timestamp before the additional data load - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = table_with_data_ref - read_session.table_modifiers.snapshot_time.CopyFrom(before_new_data) - read_session.data_format = bigquery_storage_v1.enums.DataFormat.AVRO + read_session.table_modifiers.snapshot_time = before_new_data + read_session.data_format = types.DataFormat.AVRO session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) stream = session.streams[0].name @@ -221,13 +230,17 @@ def test_column_partitioned_table( # Read from the table with a partition filter specified, and verify that # only the expected data is returned. - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = col_partition_table_ref - read_session.data_format = bigquery_storage_v1.enums.DataFormat.AVRO + read_session.data_format = types.DataFormat.AVRO read_session.read_options.row_restriction = "occurred = '2018-02-15'" session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) assert session.streams # there should be some data to fetch @@ -244,11 +257,7 @@ def test_column_partitioned_table( @pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO), - (bigquery_storage_v1.enums.DataFormat.ARROW), - ), + "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) ) def test_ingestion_time_partitioned_table( client, project_id, ingest_partition_table_ref, bq_client, data_format @@ -277,13 +286,17 @@ def test_ingestion_time_partitioned_table( ) bq_client.load_table_from_json(data, destination).result() - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = ingest_partition_table_ref read_session.data_format = data_format read_session.read_options.row_restriction = "DATE(_PARTITIONTIME) = '2019-08-10'" session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) assert session.streams # there should be some data to fetch @@ -293,10 +306,10 @@ def test_ingestion_time_partitioned_table( rows = list(client.read_rows(stream).rows(session)) assert len(rows) == 2 - if data_format == bigquery_storage_v1.enums.DataFormat.AVRO: + if data_format == types.DataFormat.AVRO: actual_items = {(row["shape"], row["altitude"]) for row in rows} else: - assert data_format == bigquery_storage_v1.enums.DataFormat.ARROW + assert data_format == types.DataFormat.ARROW actual_items = {(row["shape"].as_py(), row["altitude"].as_py()) for row in rows} expected_items = {("sphere", 3500), ("doughnut", 100)} @@ -304,11 +317,7 @@ def test_ingestion_time_partitioned_table( @pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO), - (bigquery_storage_v1.enums.DataFormat.ARROW), - ), + "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) ) def test_decoding_data_types( client, project_id, all_types_table_ref, bq_client, data_format @@ -361,22 +370,26 @@ def test_decoding_data_types( destination = _to_bq_table_ref(all_types_table_ref) bq_client.load_table_from_json(data, destination, job_config=job_config).result() - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = all_types_table_ref read_session.data_format = data_format session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) assert session.streams # there should be data available stream = session.streams[0].name - if data_format == bigquery_storage_v1.enums.DataFormat.AVRO: + if data_format == types.DataFormat.AVRO: rows = list(client.read_rows(stream).rows(session)) else: - assert data_format == bigquery_storage_v1.enums.DataFormat.ARROW + assert data_format == types.DataFormat.ARROW rows = list( dict((key, value.as_py()) for key, value in row_dict.items()) for row_dict in client.read_rows(stream).rows(session) @@ -409,22 +422,22 @@ def test_decoding_data_types( @pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1.enums.DataFormat.AVRO), - (bigquery_storage_v1.enums.DataFormat.ARROW), - ), + "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) ) def test_resuming_read_from_offset( client, project_id, data_format, local_shakespeare_table_reference ): - read_session = bigquery_storage_v1.types.ReadSession() + read_session = types.ReadSession() read_session.table = local_shakespeare_table_reference read_session.data_format = data_format session = client.create_read_session( - "projects/{}".format(project_id), read_session, max_stream_count=1 + request={ + "parent": "projects/{}".format(project_id), + "read_session": read_session, + "max_stream_count": 1, + } ) assert session.streams # there should be data available @@ -441,7 +454,7 @@ def test_resuming_read_from_offset( # fetch the rest of the rows using the stream offset offset = some_rows.row_count + more_rows.row_count remaining_rows_count = sum( - 1 for _ in client.read_rows(stream, offset).rows(session) + 1 for _ in client.read_rows(stream, offset=offset).rows(session) ) # verify that the counts match diff --git a/tests/system/v1beta1/conftest.py b/tests/system/v1beta1/conftest.py deleted file mode 100644 index 2e0812a6..00000000 --- a/tests/system/v1beta1/conftest.py +++ /dev/null @@ -1,232 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""System tests for reading rows from tables.""" - -import os -import uuid - -import pytest - -from google.cloud import bigquery_storage_v1beta1 - -_ASSETS_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../assets") - - -@pytest.fixture(scope="session") -def project_id(): - return os.environ["PROJECT_ID"] - - -@pytest.fixture(scope="session") -def credentials(): - from google.oauth2 import service_account - - # NOTE: the test config in noxfile checks that the env variable is indeed set - filename = os.environ["GOOGLE_APPLICATION_CREDENTIALS"] - return service_account.Credentials.from_service_account_file(filename) - - -@pytest.fixture(scope="session") -def bq_client(credentials): - from google.cloud import bigquery - - return bigquery.Client(credentials=credentials) - - -@pytest.fixture(scope="session") -def dataset(project_id, bq_client): - from google.cloud import bigquery - - unique_suffix = str(uuid.uuid4()).replace("-", "_") - dataset_name = "bq_storage_system_tests_" + unique_suffix - - dataset_id = "{}.{}".format(project_id, dataset_name) - dataset = bigquery.Dataset(dataset_id) - dataset.location = "US" - created_dataset = bq_client.create_dataset(dataset) - - yield created_dataset - - bq_client.delete_dataset(dataset, delete_contents=True) - - -@pytest.fixture(scope="session") -def table(project_id, dataset, bq_client): - from google.cloud import bigquery - - schema = [ - bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), - ] - - table_id = "{}.{}.{}".format(project_id, dataset.dataset_id, "users") - bq_table = bigquery.Table(table_id, schema=schema) - created_table = bq_client.create_table(bq_table) - - yield created_table - - bq_client.delete_table(created_table) - - -@pytest.fixture -def table_with_data_ref(dataset, table, bq_client): - from google.cloud import bigquery - - job_config = bigquery.LoadJobConfig() - job_config.source_format = bigquery.SourceFormat.CSV - job_config.skip_leading_rows = 1 - job_config.schema = table.schema - - filename = os.path.join(_ASSETS_DIR, "people_data.csv") - - with open(filename, "rb") as source_file: - job = bq_client.load_table_from_file(source_file, table, job_config=job_config) - - job.result() # wait for the load to complete - - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = table.project - table_ref.dataset_id = table.dataset_id - table_ref.table_id = table.table_id - yield table_ref - - # truncate table data - query = "DELETE FROM {}.{} WHERE 1 = 1".format(dataset.dataset_id, table.table_id) - query_job = bq_client.query(query, location="US") - query_job.result() - - -@pytest.fixture -def col_partition_table_ref(project_id, dataset, bq_client): - from google.cloud import bigquery - - schema = [ - bigquery.SchemaField("occurred", "DATE", mode="NULLABLE"), - bigquery.SchemaField("description", "STRING", mode="NULLABLE"), - ] - time_partitioning = bigquery.table.TimePartitioning( - type_=bigquery.table.TimePartitioningType.DAY, field="occurred" - ) - bq_table = bigquery.table.Table( - table_ref="{}.{}.notable_events".format(project_id, dataset.dataset_id), - schema=schema, - ) - bq_table.time_partitioning = time_partitioning - - created_table = bq_client.create_table(bq_table) - - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = created_table.project - table_ref.dataset_id = created_table.dataset_id - table_ref.table_id = created_table.table_id - yield table_ref - - bq_client.delete_table(created_table) - - -@pytest.fixture -def ingest_partition_table_ref(project_id, dataset, bq_client): - from google.cloud import bigquery - - schema = [ - bigquery.SchemaField("shape", "STRING", mode="NULLABLE"), - bigquery.SchemaField("altitude", "INT64", mode="NULLABLE"), - ] - time_partitioning = bigquery.table.TimePartitioning( - type_=bigquery.table.TimePartitioningType.DAY, - field=None, # use _PARTITIONTIME pseudo column - ) - bq_table = bigquery.table.Table( - table_ref="{}.{}.ufo_sightings".format(project_id, dataset.dataset_id), - schema=schema, - ) - bq_table.time_partitioning = time_partitioning - - created_table = bq_client.create_table(bq_table) - - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = created_table.project - table_ref.dataset_id = created_table.dataset_id - table_ref.table_id = created_table.table_id - yield table_ref - - bq_client.delete_table(created_table) - - -@pytest.fixture -def all_types_table_ref(project_id, dataset, bq_client): - from google.cloud import bigquery - - schema = [ - bigquery.SchemaField("string_field", "STRING"), - bigquery.SchemaField("bytes_field", "BYTES"), - bigquery.SchemaField("int64_field", "INT64"), - bigquery.SchemaField("float64_field", "FLOAT64"), - bigquery.SchemaField("numeric_field", "NUMERIC"), - bigquery.SchemaField("bool_field", "BOOL"), - bigquery.SchemaField("geography_field", "GEOGRAPHY"), - bigquery.SchemaField( - "person_struct_field", - "STRUCT", - fields=( - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("age", "INT64"), - ), - ), - bigquery.SchemaField("timestamp_field", "TIMESTAMP"), - bigquery.SchemaField("date_field", "DATE"), - bigquery.SchemaField("time_field", "TIME"), - bigquery.SchemaField("datetime_field", "DATETIME"), - bigquery.SchemaField("string_array_field", "STRING", mode="REPEATED"), - ] - bq_table = bigquery.table.Table( - table_ref="{}.{}.complex_records".format(project_id, dataset.dataset_id), - schema=schema, - ) - - created_table = bq_client.create_table(bq_table) - - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = created_table.project - table_ref.dataset_id = created_table.dataset_id - table_ref.table_id = created_table.table_id - yield table_ref - - bq_client.delete_table(created_table) - - -@pytest.fixture(scope="session") -def client(credentials): - return bigquery_storage_v1beta1.BigQueryStorageClient(credentials=credentials) - - -@pytest.fixture() -def table_reference(): - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = "bigquery-public-data" - table_ref.dataset_id = "usa_names" - table_ref.table_id = "usa_1910_2013" - return table_ref - - -@pytest.fixture() -def small_table_reference(): - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = "bigquery-public-data" - table_ref.dataset_id = "utility_us" - table_ref.table_id = "country_code_iso" - return table_ref diff --git a/tests/system/v1beta1/test_reader_dataframe_v1beta1.py b/tests/system/v1beta1/test_reader_dataframe_v1beta1.py deleted file mode 100644 index 20143f0f..00000000 --- a/tests/system/v1beta1/test_reader_dataframe_v1beta1.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""System tests for reading rows with pandas connector.""" - -import numpy -import pyarrow.types -import pytest - -from google.cloud import bigquery_storage_v1beta1 - - -def test_read_rows_to_arrow(client, project_id): - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = "bigquery-public-data" - table_ref.dataset_id = "new_york_citibike" - table_ref.table_id = "citibike_stations" - - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - read_options.selected_fields.append("station_id") - read_options.selected_fields.append("latitude") - read_options.selected_fields.append("longitude") - read_options.selected_fields.append("name") - session = client.create_read_session( - table_ref, - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - read_options=read_options, - requested_streams=1, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - tbl = client.read_rows(stream_pos).to_arrow(session) - - assert tbl.num_columns == 4 - schema = tbl.schema - # Use field with a name specifier as there may be ordering differences - # when selected_fields is used - assert pyarrow.types.is_int64(schema.field("station_id").type) - assert pyarrow.types.is_float64(schema.field("latitude").type) - assert pyarrow.types.is_float64(schema.field("longitude").type) - assert pyarrow.types.is_string(schema.field("name").type) - - -@pytest.mark.parametrize( - "data_format,expected_schema_type", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO, "avro_schema"), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW, "arrow_schema"), - ), -) -def test_read_rows_to_dataframe(client, project_id, data_format, expected_schema_type): - table_ref = bigquery_storage_v1beta1.types.TableReference() - table_ref.project_id = "bigquery-public-data" - table_ref.dataset_id = "new_york_citibike" - table_ref.table_id = "citibike_stations" - session = client.create_read_session( - table_ref, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - ) - schema_type = session.WhichOneof("schema") - assert schema_type == expected_schema_type - - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - frame = client.read_rows(stream_pos).to_dataframe( - session, dtypes={"latitude": numpy.float16} - ) - - # Station ID is a required field (no nulls), so the datatype should always - # be integer. - assert frame.station_id.dtype.name == "int64" - assert frame.latitude.dtype.name == "float16" - assert frame.longitude.dtype.name == "float64" - assert frame["name"].str.startswith("Central Park").any() diff --git a/tests/system/v1beta1/test_reader_v1beta1.py b/tests/system/v1beta1/test_reader_v1beta1.py deleted file mode 100644 index cd43d02c..00000000 --- a/tests/system/v1beta1/test_reader_v1beta1.py +++ /dev/null @@ -1,468 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""System tests for reading rows from tables.""" - -import copy -import datetime as dt -import decimal -import re - -import pytest -import pytz - -from google.cloud import bigquery -from google.cloud import bigquery_storage_v1beta1 -from google.protobuf import timestamp_pb2 - - -def _to_bq_table_ref(proto_table_ref, partition_suffix=""): - """Converts protobuf table reference to bigquery table reference. - - Args: - proto_table_ref (bigquery_storage_v1beta1.types.TableReference): - A protobuf reference to a table. - partition_suffix (str): - An optional suffix to append to the table_id, useful for selecting - partitions of ingestion-time partitioned tables. - - Returns: - google.cloud.bigquery.table.TableReference - """ - return bigquery.table.TableReference.from_api_repr( - { - "projectId": proto_table_ref.project_id, - "datasetId": proto_table_ref.dataset_id, - "tableId": proto_table_ref.table_id + partition_suffix, - } - ) - - -@pytest.mark.parametrize( - "data_format,expected_schema_type", - ( - (None, "avro_schema"), # Default format (Avro). - (bigquery_storage_v1beta1.enums.DataFormat.AVRO, "avro_schema"), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW, "arrow_schema"), - ), -) -def test_read_rows_as_blocks_full_table( - client, project_id, small_table_reference, data_format, expected_schema_type -): - session = client.create_read_session( - small_table_reference, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - schema_type = session.WhichOneof("schema") - assert schema_type == expected_schema_type - - blocks = list(client.read_rows(stream_pos)) - - assert len(blocks) > 0 - block = blocks[0] - assert block.status.estimated_row_count > 0 - - -@pytest.mark.parametrize( - "data_format,expected_schema_type", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO, "avro_schema"), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW, "arrow_schema"), - ), -) -def test_read_rows_as_rows_full_table( - client, project_id, small_table_reference, data_format, expected_schema_type -): - session = client.create_read_session( - small_table_reference, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - rows = list(client.read_rows(stream_pos).rows(session)) - - assert len(rows) > 0 - - -@pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW), - ), -) -def test_basic_nonfiltered_read(client, project_id, table_with_data_ref, data_format): - session = client.create_read_session( - table_with_data_ref, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - rows = list(client.read_rows(stream_pos).rows(session)) - - assert len(rows) == 5 # all table rows - - -def test_filtered_rows_read(client, project_id, table_with_data_ref): - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - read_options.row_restriction = "age >= 50" - - session = client.create_read_session( - table_with_data_ref, - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.AVRO, - requested_streams=1, - read_options=read_options, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - rows = list(client.read_rows(stream_pos).rows(session)) - - assert len(rows) == 2 - - -@pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW), - ), -) -def test_column_selection_read(client, project_id, table_with_data_ref, data_format): - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - read_options.selected_fields.append("first_name") - read_options.selected_fields.append("age") - - session = client.create_read_session( - table_with_data_ref, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - read_options=read_options, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - rows = list(client.read_rows(stream_pos).rows(session)) - - for row in rows: - assert sorted(row.keys()) == ["age", "first_name"] - - -def test_snapshot(client, project_id, table_with_data_ref, bq_client): - before_new_data = timestamp_pb2.Timestamp() - before_new_data.GetCurrentTime() - - # load additional data into the table - new_data = [ - {u"first_name": u"NewGuyFoo", u"last_name": u"Smith", u"age": 46}, - {u"first_name": u"NewGuyBar", u"last_name": u"Jones", u"age": 30}, - ] - - destination = _to_bq_table_ref(table_with_data_ref) - bq_client.load_table_from_json(new_data, destination).result() - - # read data using the timestamp before the additional data load - session = client.create_read_session( - table_with_data_ref, - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.AVRO, - requested_streams=1, - table_modifiers={"snapshot_time": before_new_data}, - ) - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - rows = list(client.read_rows(stream_pos).rows(session)) - - # verify that only the data before the timestamp was returned - assert len(rows) == 5 # all initial records - - for row in rows: - assert "NewGuy" not in row["first_name"] # no new records - - -def test_column_partitioned_table( - client, project_id, col_partition_table_ref, bq_client -): - data = [ - {"description": "Tracking established.", "occurred": "2017-02-15"}, - {"description": "Look, a solar eclipse!", "occurred": "2018-02-15"}, - {"description": "Fake solar eclipse reported.", "occurred": "2018-02-15"}, - {"description": "1 day after false eclipse report.", "occurred": "2018-02-16"}, - {"description": "1 year after false eclipse report.", "occurred": "2019-02-15"}, - ] - - destination = _to_bq_table_ref(col_partition_table_ref) - bq_client.load_table_from_json(data, destination).result() - - # Read from the table with a partition filter specified, and verify that - # only the expected data is returned. - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - read_options.row_restriction = "occurred = '2018-02-15'" - - session = client.create_read_session( - col_partition_table_ref, - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.AVRO, - requested_streams=1, - read_options=read_options, - ) - - assert session.streams # there should be some data to fetch - - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - rows = list(client.read_rows(stream_pos).rows(session)) - - assert len(rows) == 2 - - expected_descriptions = ("Look, a solar eclipse!", "Fake solar eclipse reported.") - for row in rows: - assert row["occurred"] == dt.date(2018, 2, 15) - assert row["description"] in expected_descriptions - - -@pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW), - ), -) -def test_ingestion_time_partitioned_table( - client, project_id, ingest_partition_table_ref, bq_client, data_format -): - data = [{"shape": "cigar", "altitude": 1200}, {"shape": "disc", "altitude": 750}] - destination = _to_bq_table_ref( - ingest_partition_table_ref, partition_suffix="$20190809" - ) - bq_client.load_table_from_json(data, destination).result() - - data = [ - {"shape": "sphere", "altitude": 3500}, - {"shape": "doughnut", "altitude": 100}, - ] - destination = _to_bq_table_ref( - ingest_partition_table_ref, partition_suffix="$20190810" - ) - bq_client.load_table_from_json(data, destination).result() - - data = [ - {"shape": "elephant", "altitude": 1}, - {"shape": "rocket", "altitude": 12700}, - ] - destination = _to_bq_table_ref( - ingest_partition_table_ref, partition_suffix="$20190811" - ) - bq_client.load_table_from_json(data, destination).result() - - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - read_options.row_restriction = "DATE(_PARTITIONTIME) = '2019-08-10'" - - session = client.create_read_session( - ingest_partition_table_ref, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - read_options=read_options, - ) - - assert session.streams # there should be some data to fetch - - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - rows = list(client.read_rows(stream_pos).rows(session)) - assert len(rows) == 2 - - if data_format == bigquery_storage_v1beta1.enums.DataFormat.AVRO: - actual_items = {(row["shape"], row["altitude"]) for row in rows} - else: - assert data_format == bigquery_storage_v1beta1.enums.DataFormat.ARROW - actual_items = {(row["shape"].as_py(), row["altitude"].as_py()) for row in rows} - - expected_items = {("sphere", 3500), ("doughnut", 100)} - assert actual_items == expected_items - - -@pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW), - ), -) -def test_decoding_data_types( - client, project_id, all_types_table_ref, bq_client, data_format -): - data = [ - { - u"string_field": u"Price: € 9.95.", - u"bytes_field": bigquery._helpers._bytes_to_json(b"byteees"), - u"int64_field": -1085, - u"float64_field": -42.195, - u"numeric_field": "1.4142", - u"bool_field": True, - u"geography_field": '{"type": "Point", "coordinates": [-49.3028, 69.0622]}', - u"person_struct_field": {u"name": u"John", u"age": 42}, - u"timestamp_field": 1565357902.017896, # 2019-08-09T13:38:22.017896 - u"date_field": u"1995-03-17", - u"time_field": u"16:24:51", - u"datetime_field": u"2005-10-26T19:49:41", - u"string_array_field": [u"foo", u"bar", u"baz"], - } - ] - - # Explicit schema is needed to recognize bytes_field as BYTES, and not STRING. - # Since partial schemas are not supported in load_table_from_json(), a full - # schema needs to be specified. - schema = [ - bigquery.SchemaField("string_field", "STRING"), - bigquery.SchemaField("bytes_field", "BYTES"), - bigquery.SchemaField("int64_field", "INT64"), - bigquery.SchemaField("float64_field", "FLOAT64"), - bigquery.SchemaField("numeric_field", "NUMERIC"), - bigquery.SchemaField("bool_field", "BOOL"), - bigquery.SchemaField("geography_field", "GEOGRAPHY"), - bigquery.SchemaField( - "person_struct_field", - "STRUCT", - fields=( - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("age", "INT64"), - ), - ), - bigquery.SchemaField("timestamp_field", "TIMESTAMP"), - bigquery.SchemaField("date_field", "DATE"), - bigquery.SchemaField("time_field", "TIME"), - bigquery.SchemaField("datetime_field", "DATETIME"), - bigquery.SchemaField("string_array_field", "STRING", mode="REPEATED"), - ] - - job_config = bigquery.LoadJobConfig(schema=schema) - destination = _to_bq_table_ref(all_types_table_ref) - bq_client.load_table_from_json(data, destination, job_config=job_config).result() - - session = client.create_read_session( - all_types_table_ref, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - ) - - assert session.streams # there should be data available - - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=session.streams[0] - ) - - if data_format == bigquery_storage_v1beta1.enums.DataFormat.AVRO: - rows = list(client.read_rows(stream_pos).rows(session)) - else: - assert data_format == bigquery_storage_v1beta1.enums.DataFormat.ARROW - rows = list( - dict((key, value.as_py()) for key, value in row_dict.items()) - for row_dict in client.read_rows(stream_pos).rows(session) - ) - - expected_result = { - u"string_field": u"Price: € 9.95.", - u"bytes_field": b"byteees", - u"int64_field": -1085, - u"float64_field": -42.195, - u"numeric_field": decimal.Decimal("1.4142"), - u"bool_field": True, - u"geography_field": "POINT(-49.3028 69.0622)", - u"person_struct_field": {u"name": u"John", u"age": 42}, - u"timestamp_field": dt.datetime(2019, 8, 9, 13, 38, 22, 17896, tzinfo=pytz.UTC), - u"date_field": dt.date(1995, 3, 17), - u"time_field": dt.time(16, 24, 51), - u"string_array_field": [u"foo", u"bar", u"baz"], - } - - result_copy = copy.copy(rows[0]) - del result_copy["datetime_field"] - assert result_copy == expected_result - - # Compare datetime separately, AVRO and PYARROW return different object types, - # although they should both represent the same value. - # TODO: when fixed, change assertion to assert a datetime instance! - expected_pattern = re.compile(r"2005-10-26( |T)19:49:41") - assert expected_pattern.match(str(rows[0]["datetime_field"])) - - -@pytest.mark.parametrize( - "data_format", - ( - (bigquery_storage_v1beta1.enums.DataFormat.AVRO), - (bigquery_storage_v1beta1.enums.DataFormat.ARROW), - ), -) -def test_resuming_read_from_offset(client, project_id, data_format): - shakespeare_ref = bigquery_storage_v1beta1.types.TableReference() - shakespeare_ref.project_id = project_id - shakespeare_ref.dataset_id = "public_samples_copy" - shakespeare_ref.table_id = "shakespeare" - - read_session = client.create_read_session( - shakespeare_ref, - "projects/{}".format(project_id), - format_=data_format, - requested_streams=1, - ) - - assert read_session.streams # there should be data available - - stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=read_session.streams[0], offset=0 - ) - read_rows_stream = client.read_rows(stream_pos) - - # fetch the first two batches of rows - rows_iter = iter(read_rows_stream) - some_rows = next(rows_iter) - more_rows = next(rows_iter) - - # fetch the rest of the rows using the stream offset - new_stream_pos = bigquery_storage_v1beta1.types.StreamPosition( - stream=read_session.streams[0], offset=some_rows.row_count + more_rows.row_count - ) - remaining_rows_count = sum( - 1 for _ in client.read_rows(new_stream_pos).rows(read_session) - ) - - # verify that the counts match - expected_len = 164656 # total rows in shakespeare table - actual_len = remaining_rows_count + some_rows.row_count + more_rows.row_count - assert actual_len == expected_len diff --git a/tests/unit/gapic/storage_v1/__init__.py b/tests/unit/gapic/storage_v1/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/unit/gapic/storage_v1/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/unit/gapic/storage_v1/test_big_query_read.py b/tests/unit/gapic/storage_v1/test_big_query_read.py new file mode 100644 index 00000000..848e0d83 --- /dev/null +++ b/tests/unit/gapic/storage_v1/test_big_query_read.py @@ -0,0 +1,1350 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import mock + +import grpc +from grpc.experimental import aio +import math +import pytest +from proto.marshal.rules.dates import DurationRule, TimestampRule + +from google import auth +from google.api_core import client_options +from google.api_core import exceptions +from google.api_core import gapic_v1 +from google.api_core import grpc_helpers +from google.api_core import grpc_helpers_async +from google.auth import credentials +from google.auth.exceptions import MutualTLSChannelError +from google.cloud.bigquery.storage_v1.services.big_query_read import ( + BigQueryReadAsyncClient, +) +from google.cloud.bigquery.storage_v1.services.big_query_read import BigQueryReadClient +from google.cloud.bigquery.storage_v1.services.big_query_read import transports +from google.cloud.bigquery.storage_v1.types import arrow +from google.cloud.bigquery.storage_v1.types import avro +from google.cloud.bigquery.storage_v1.types import storage +from google.cloud.bigquery.storage_v1.types import stream +from google.oauth2 import service_account +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + + +def client_cert_source_callback(): + return b"cert bytes", b"key bytes" + + +# If default endpoint is localhost, then default mtls endpoint will be the same. +# This method modifies the default endpoint so the client can produce a different +# mtls endpoint for endpoint testing purposes. +def modify_default_endpoint(client): + return ( + "foo.googleapis.com" + if ("localhost" in client.DEFAULT_ENDPOINT) + else client.DEFAULT_ENDPOINT + ) + + +def test__get_default_mtls_endpoint(): + api_endpoint = "example.googleapis.com" + api_mtls_endpoint = "example.mtls.googleapis.com" + sandbox_endpoint = "example.sandbox.googleapis.com" + sandbox_mtls_endpoint = "example.mtls.sandbox.googleapis.com" + non_googleapi = "api.example.com" + + assert BigQueryReadClient._get_default_mtls_endpoint(None) is None + assert ( + BigQueryReadClient._get_default_mtls_endpoint(api_endpoint) == api_mtls_endpoint + ) + assert ( + BigQueryReadClient._get_default_mtls_endpoint(api_mtls_endpoint) + == api_mtls_endpoint + ) + assert ( + BigQueryReadClient._get_default_mtls_endpoint(sandbox_endpoint) + == sandbox_mtls_endpoint + ) + assert ( + BigQueryReadClient._get_default_mtls_endpoint(sandbox_mtls_endpoint) + == sandbox_mtls_endpoint + ) + assert BigQueryReadClient._get_default_mtls_endpoint(non_googleapi) == non_googleapi + + +@pytest.mark.parametrize("client_class", [BigQueryReadClient, BigQueryReadAsyncClient]) +def test_big_query_read_client_from_service_account_file(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_file" + ) as factory: + factory.return_value = creds + client = client_class.from_service_account_file("dummy/file/path.json") + assert client._transport._credentials == creds + + client = client_class.from_service_account_json("dummy/file/path.json") + assert client._transport._credentials == creds + + assert client._transport._host == "bigquerystorage.googleapis.com:443" + + +def test_big_query_read_client_get_transport_class(): + transport = BigQueryReadClient.get_transport_class() + assert transport == transports.BigQueryReadGrpcTransport + + transport = BigQueryReadClient.get_transport_class("grpc") + assert transport == transports.BigQueryReadGrpcTransport + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +@mock.patch.object( + BigQueryReadClient, "DEFAULT_ENDPOINT", modify_default_endpoint(BigQueryReadClient) +) +@mock.patch.object( + BigQueryReadAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryReadAsyncClient), +) +def test_big_query_read_client_client_options( + client_class, transport_class, transport_name +): + # Check that if channel is provided we won't create a new one. + with mock.patch.object(BigQueryReadClient, "get_transport_class") as gtc: + transport = transport_class(credentials=credentials.AnonymousCredentials()) + client = client_class(transport=transport) + gtc.assert_not_called() + + # Check that if channel is provided via str we will create a new one. + with mock.patch.object(BigQueryReadClient, "get_transport_class") as gtc: + client = client_class(transport=transport_name) + gtc.assert_called() + + # Check the case api_endpoint is provided. + options = client_options.ClientOptions(api_endpoint="squid.clam.whelk") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "never". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "never"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "always". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "always"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_MTLS_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT has + # unsupported value. + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "Unsupported"}): + with pytest.raises(MutualTLSChannelError): + client = client_class() + + # Check the case GOOGLE_API_USE_CLIENT_CERTIFICATE has unsupported value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": "Unsupported"} + ): + with pytest.raises(ValueError): + client = client_class() + + # Check the case quota_project_id is provided + options = client_options.ClientOptions(quota_project_id="octopus") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id="octopus", + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name,use_client_cert_env", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc", "true"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + "true", + ), + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc", "false"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + "false", + ), + ], +) +@mock.patch.object( + BigQueryReadClient, "DEFAULT_ENDPOINT", modify_default_endpoint(BigQueryReadClient) +) +@mock.patch.object( + BigQueryReadAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryReadAsyncClient), +) +@mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "auto"}) +def test_big_query_read_client_mtls_env_auto( + client_class, transport_class, transport_name, use_client_cert_env +): + # This tests the endpoint autoswitch behavior. Endpoint is autoswitched to the default + # mtls endpoint, if GOOGLE_API_USE_CLIENT_CERTIFICATE is "true" and client cert exists. + + # Check the case client_cert_source is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + options = client_options.ClientOptions( + client_cert_source=client_cert_source_callback + ) + with mock.patch.object(transport_class, "__init__") as patched: + ssl_channel_creds = mock.Mock() + with mock.patch( + "grpc.ssl_channel_credentials", return_value=ssl_channel_creds + ): + patched.return_value = None + client = client_class(client_options=options) + + if use_client_cert_env == "false": + expected_ssl_channel_creds = None + expected_host = client.DEFAULT_ENDPOINT + else: + expected_ssl_channel_creds = ssl_channel_creds + expected_host = client.DEFAULT_MTLS_ENDPOINT + + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + ssl_channel_credentials=expected_ssl_channel_creds, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case ADC client cert is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + ): + with mock.patch( + "google.auth.transport.grpc.SslCredentials.is_mtls", + new_callable=mock.PropertyMock, + ) as is_mtls_mock: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.ssl_credentials", + new_callable=mock.PropertyMock, + ) as ssl_credentials_mock: + if use_client_cert_env == "false": + is_mtls_mock.return_value = False + ssl_credentials_mock.return_value = None + expected_host = client.DEFAULT_ENDPOINT + expected_ssl_channel_creds = None + else: + is_mtls_mock.return_value = True + ssl_credentials_mock.return_value = mock.Mock() + expected_host = client.DEFAULT_MTLS_ENDPOINT + expected_ssl_channel_creds = ( + ssl_credentials_mock.return_value + ) + + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + ssl_channel_credentials=expected_ssl_channel_creds, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case client_cert_source and ADC client cert are not provided. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + ): + with mock.patch( + "google.auth.transport.grpc.SslCredentials.is_mtls", + new_callable=mock.PropertyMock, + ) as is_mtls_mock: + is_mtls_mock.return_value = False + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_big_query_read_client_client_options_scopes( + client_class, transport_class, transport_name +): + # Check the case scopes are provided. + options = client_options.ClientOptions(scopes=["1", "2"],) + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=["1", "2"], + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_big_query_read_client_client_options_credentials_file( + client_class, transport_class, transport_name +): + # Check the case credentials file is provided. + options = client_options.ClientOptions(credentials_file="credentials.json") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file="credentials.json", + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_big_query_read_client_client_options_from_dict(): + with mock.patch( + "google.cloud.bigquery.storage_v1.services.big_query_read.transports.BigQueryReadGrpcTransport.__init__" + ) as grpc_transport: + grpc_transport.return_value = None + client = BigQueryReadClient(client_options={"api_endpoint": "squid.clam.whelk"}) + grpc_transport.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_create_read_session( + transport: str = "grpc", request_type=storage.CreateReadSessionRequest +): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.ReadSession( + name="name_value", + data_format=stream.DataFormat.AVRO, + table="table_value", + avro_schema=avro.AvroSchema(schema="schema_value"), + ) + + response = client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.CreateReadSessionRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, stream.ReadSession) + + assert response.name == "name_value" + + assert response.data_format == stream.DataFormat.AVRO + + assert response.table == "table_value" + + +def test_create_read_session_from_dict(): + test_create_read_session(request_type=dict) + + +@pytest.mark.asyncio +async def test_create_read_session_async(transport: str = "grpc_asyncio"): + client = BigQueryReadAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = storage.CreateReadSessionRequest() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + stream.ReadSession( + name="name_value", + data_format=stream.DataFormat.AVRO, + table="table_value", + ) + ) + + response = await client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == request + + # Establish that the response is the type that we expect. + assert isinstance(response, stream.ReadSession) + + assert response.name == "name_value" + + assert response.data_format == stream.DataFormat.AVRO + + assert response.table == "table_value" + + +def test_create_read_session_field_headers(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.CreateReadSessionRequest() + request.read_session.table = "read_session.table/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.create_read_session), "__call__" + ) as call: + call.return_value = stream.ReadSession() + + client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "read_session.table=read_session.table/value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_create_read_session_field_headers_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.CreateReadSessionRequest() + request.read_session.table = "read_session.table/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.create_read_session), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.ReadSession()) + + await client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "read_session.table=read_session.table/value", + ) in kw["metadata"] + + +def test_create_read_session_flattened(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.ReadSession() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.create_read_session( + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + assert args[0].read_session == stream.ReadSession(name="name_value") + + assert args[0].max_stream_count == 1721 + + +def test_create_read_session_flattened_error(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.create_read_session( + storage.CreateReadSessionRequest(), + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + +@pytest.mark.asyncio +async def test_create_read_session_flattened_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.ReadSession() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.ReadSession()) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.create_read_session( + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + assert args[0].read_session == stream.ReadSession(name="name_value") + + assert args[0].max_stream_count == 1721 + + +@pytest.mark.asyncio +async def test_create_read_session_flattened_error_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.create_read_session( + storage.CreateReadSessionRequest(), + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + +def test_read_rows(transport: str = "grpc", request_type=storage.ReadRowsRequest): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client._transport.read_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.ReadRowsResponse()]) + + response = client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.ReadRowsRequest() + + # Establish that the response is the type that we expect. + for message in response: + assert isinstance(message, storage.ReadRowsResponse) + + +def test_read_rows_from_dict(): + test_read_rows(request_type=dict) + + +@pytest.mark.asyncio +async def test_read_rows_async(transport: str = "grpc_asyncio"): + client = BigQueryReadAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = storage.ReadRowsRequest() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.read_rows), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[storage.ReadRowsResponse()] + ) + + response = await client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == request + + # Establish that the response is the type that we expect. + message = await response.read() + assert isinstance(message, storage.ReadRowsResponse) + + +def test_read_rows_field_headers(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.ReadRowsRequest() + request.read_stream = "read_stream/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client._transport.read_rows), "__call__") as call: + call.return_value = iter([storage.ReadRowsResponse()]) + + client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "read_stream=read_stream/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_read_rows_field_headers_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.ReadRowsRequest() + request.read_stream = "read_stream/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.read_rows), "__call__" + ) as call: + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[storage.ReadRowsResponse()] + ) + + await client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "read_stream=read_stream/value",) in kw["metadata"] + + +def test_read_rows_flattened(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client._transport.read_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.ReadRowsResponse()]) + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.read_rows( + read_stream="read_stream_value", offset=647, + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].read_stream == "read_stream_value" + + assert args[0].offset == 647 + + +def test_read_rows_flattened_error(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.read_rows( + storage.ReadRowsRequest(), read_stream="read_stream_value", offset=647, + ) + + +@pytest.mark.asyncio +async def test_read_rows_flattened_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.read_rows), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.ReadRowsResponse()]) + + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.read_rows(read_stream="read_stream_value", offset=647,) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].read_stream == "read_stream_value" + + assert args[0].offset == 647 + + +@pytest.mark.asyncio +async def test_read_rows_flattened_error_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.read_rows( + storage.ReadRowsRequest(), read_stream="read_stream_value", offset=647, + ) + + +def test_split_read_stream( + transport: str = "grpc", request_type=storage.SplitReadStreamRequest +): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.split_read_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.SplitReadStreamResponse() + + response = client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.SplitReadStreamRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, storage.SplitReadStreamResponse) + + +def test_split_read_stream_from_dict(): + test_split_read_stream(request_type=dict) + + +@pytest.mark.asyncio +async def test_split_read_stream_async(transport: str = "grpc_asyncio"): + client = BigQueryReadAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = storage.SplitReadStreamRequest() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.split_read_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.SplitReadStreamResponse() + ) + + response = await client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == request + + # Establish that the response is the type that we expect. + assert isinstance(response, storage.SplitReadStreamResponse) + + +def test_split_read_stream_field_headers(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.SplitReadStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.split_read_stream), "__call__" + ) as call: + call.return_value = storage.SplitReadStreamResponse() + + client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_split_read_stream_field_headers_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.SplitReadStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._client._transport.split_read_stream), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.SplitReadStreamResponse() + ) + + await client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +def test_credentials_transport_error(): + # It is an error to provide credentials and a transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # It is an error to provide a credentials file and a transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryReadClient( + client_options={"credentials_file": "credentials.json"}, + transport=transport, + ) + + # It is an error to provide scopes and a transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryReadClient( + client_options={"scopes": ["1", "2"]}, transport=transport, + ) + + +def test_transport_instance(): + # A client may be instantiated with a custom transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + client = BigQueryReadClient(transport=transport) + assert client._transport is transport + + +def test_transport_get_channel(): + # A client may be instantiated with a custom transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + transport = transports.BigQueryReadGrpcAsyncIOTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + +@pytest.mark.parametrize( + "transport_class", + [transports.BigQueryReadGrpcTransport, transports.BigQueryReadGrpcAsyncIOTransport], +) +def test_transport_adc(transport_class): + # Test default credentials are used if not provided. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transport_class() + adc.assert_called_once() + + +def test_transport_grpc_default(): + # A client should use the gRPC transport by default. + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + assert isinstance(client._transport, transports.BigQueryReadGrpcTransport,) + + +def test_big_query_read_base_transport_error(): + # Passing both a credentials object and credentials_file should raise an error + with pytest.raises(exceptions.DuplicateCredentialArgs): + transport = transports.BigQueryReadTransport( + credentials=credentials.AnonymousCredentials(), + credentials_file="credentials.json", + ) + + +def test_big_query_read_base_transport(): + # Instantiate the base transport. + with mock.patch( + "google.cloud.bigquery.storage_v1.services.big_query_read.transports.BigQueryReadTransport.__init__" + ) as Transport: + Transport.return_value = None + transport = transports.BigQueryReadTransport( + credentials=credentials.AnonymousCredentials(), + ) + + # Every method on the transport should just blindly + # raise NotImplementedError. + methods = ( + "create_read_session", + "read_rows", + "split_read_stream", + ) + for method in methods: + with pytest.raises(NotImplementedError): + getattr(transport, method)(request=object()) + + +def test_big_query_read_base_transport_with_credentials_file(): + # Instantiate the base transport with a credentials file + with mock.patch.object( + auth, "load_credentials_from_file" + ) as load_creds, mock.patch( + "google.cloud.bigquery.storage_v1.services.big_query_read.transports.BigQueryReadTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + load_creds.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.BigQueryReadTransport( + credentials_file="credentials.json", quota_project_id="octopus", + ) + load_creds.assert_called_once_with( + "credentials.json", + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id="octopus", + ) + + +def test_big_query_read_base_transport_with_adc(): + # Test the default credentials are used if credentials and credentials_file are None. + with mock.patch.object(auth, "default") as adc, mock.patch( + "google.cloud.bigquery.storage_v1.services.big_query_read.transports.BigQueryReadTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + adc.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.BigQueryReadTransport() + adc.assert_called_once() + + +def test_big_query_read_auth_adc(): + # If no credentials are provided, we should use ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + BigQueryReadClient() + adc.assert_called_once_with( + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id=None, + ) + + +def test_big_query_read_transport_auth_adc(): + # If credentials and host are not provided, the transport class should use + # ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transports.BigQueryReadGrpcTransport( + host="squid.clam.whelk", quota_project_id="octopus" + ) + adc.assert_called_once_with( + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id="octopus", + ) + + +def test_big_query_read_host_no_port(): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="bigquerystorage.googleapis.com" + ), + ) + assert client._transport._host == "bigquerystorage.googleapis.com:443" + + +def test_big_query_read_host_with_port(): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="bigquerystorage.googleapis.com:8000" + ), + ) + assert client._transport._host == "bigquerystorage.googleapis.com:8000" + + +def test_big_query_read_grpc_transport_channel(): + channel = grpc.insecure_channel("http://localhost/") + + # Check that channel is used if provided. + transport = transports.BigQueryReadGrpcTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + + +def test_big_query_read_grpc_asyncio_transport_channel(): + channel = aio.insecure_channel("http://localhost/") + + # Check that channel is used if provided. + transport = transports.BigQueryReadGrpcAsyncIOTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + + +@pytest.mark.parametrize( + "transport_class", + [transports.BigQueryReadGrpcTransport, transports.BigQueryReadGrpcAsyncIOTransport], +) +def test_big_query_read_transport_channel_mtls_with_client_cert_source(transport_class): + with mock.patch( + "grpc.ssl_channel_credentials", autospec=True + ) as grpc_ssl_channel_cred: + with mock.patch.object( + transport_class, "create_channel", autospec=True + ) as grpc_create_channel: + mock_ssl_cred = mock.Mock() + grpc_ssl_channel_cred.return_value = mock_ssl_cred + + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + + cred = credentials.AnonymousCredentials() + with pytest.warns(DeprecationWarning): + with mock.patch.object(auth, "default") as adc: + adc.return_value = (cred, None) + transport = transport_class( + host="squid.clam.whelk", + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=client_cert_source_callback, + ) + adc.assert_called_once() + + grpc_ssl_channel_cred.assert_called_once_with( + certificate_chain=b"cert bytes", private_key=b"key bytes" + ) + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + ) + assert transport.grpc_channel == mock_grpc_channel + + +@pytest.mark.parametrize( + "transport_class", + [transports.BigQueryReadGrpcTransport, transports.BigQueryReadGrpcAsyncIOTransport], +) +def test_big_query_read_transport_channel_mtls_with_adc(transport_class): + mock_ssl_cred = mock.Mock() + with mock.patch.multiple( + "google.auth.transport.grpc.SslCredentials", + __init__=mock.Mock(return_value=None), + ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), + ): + with mock.patch.object( + transport_class, "create_channel", autospec=True + ) as grpc_create_channel: + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + mock_cred = mock.Mock() + + with pytest.warns(DeprecationWarning): + transport = transport_class( + host="squid.clam.whelk", + credentials=mock_cred, + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=None, + ) + + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=mock_cred, + credentials_file=None, + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + ) + assert transport.grpc_channel == mock_grpc_channel + + +def test_read_session_path(): + project = "squid" + location = "clam" + session = "whelk" + + expected = "projects/{project}/locations/{location}/sessions/{session}".format( + project=project, location=location, session=session, + ) + actual = BigQueryReadClient.read_session_path(project, location, session) + assert expected == actual + + +def test_parse_read_session_path(): + expected = { + "project": "octopus", + "location": "oyster", + "session": "nudibranch", + } + path = BigQueryReadClient.read_session_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_read_session_path(path) + assert expected == actual + + +def test_read_stream_path(): + project = "squid" + location = "clam" + session = "whelk" + stream = "octopus" + + expected = "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}".format( + project=project, location=location, session=session, stream=stream, + ) + actual = BigQueryReadClient.read_stream_path(project, location, session, stream) + assert expected == actual + + +def test_parse_read_stream_path(): + expected = { + "project": "oyster", + "location": "nudibranch", + "session": "cuttlefish", + "stream": "mussel", + } + path = BigQueryReadClient.read_stream_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_read_stream_path(path) + assert expected == actual + + +def test_client_withDEFAULT_CLIENT_INFO(): + client_info = gapic_v1.client_info.ClientInfo() + + with mock.patch.object( + transports.BigQueryReadTransport, "_prep_wrapped_messages" + ) as prep: + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) + + with mock.patch.object( + transports.BigQueryReadTransport, "_prep_wrapped_messages" + ) as prep: + transport_class = BigQueryReadClient.get_transport_class() + transport = transport_class( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) diff --git a/tests/unit/gapic/v1/test_big_query_read_client_v1.py b/tests/unit/gapic/v1/test_big_query_read_client_v1.py deleted file mode 100644 index 0ba9681d..00000000 --- a/tests/unit/gapic/v1/test_big_query_read_client_v1.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Unit tests.""" - -import mock -import pytest - -from google.cloud.bigquery_storage_v1.gapic import big_query_read_client # noqa -from google.cloud.bigquery_storage_v1.proto import storage_pb2 -from google.cloud.bigquery_storage_v1.proto import stream_pb2 - - -class MultiCallableStub(object): - """Stub for the grpc.UnaryUnaryMultiCallable interface.""" - - def __init__(self, method, channel_stub): - self.method = method - self.channel_stub = channel_stub - - def __call__(self, request, timeout=None, metadata=None, credentials=None): - self.channel_stub.requests.append((self.method, request)) - - response = None - if self.channel_stub.responses: - response = self.channel_stub.responses.pop() - - if isinstance(response, Exception): - raise response - - if response: - return response - - -class ChannelStub(object): - """Stub for the grpc.Channel interface.""" - - def __init__(self, responses=[]): - self.responses = responses - self.requests = [] - - def unary_unary(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - def unary_stream(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - -class CustomException(Exception): - pass - - -class TestBigQueryReadClient(object): - def test_create_read_session(self): - # Setup Expected Response - name = "name3373707" - table = "table110115790" - expected_response = {"name": name, "table": table} - expected_response = stream_pb2.ReadSession(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup Request - parent = client.project_path("[PROJECT]") - read_session = {} - - response = client.create_read_session(parent, read_session) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.CreateReadSessionRequest( - parent=parent, read_session=read_session - ) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_create_read_session_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup request - parent = client.project_path("[PROJECT]") - read_session = {} - - with pytest.raises(CustomException): - client.create_read_session(parent, read_session) - - def test_read_rows(self): - # Setup Expected Response - row_count = 1340416618 - expected_response = {"row_count": row_count} - expected_response = storage_pb2.ReadRowsResponse(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[iter([expected_response])]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup Request - read_stream = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - response = client.read_rows(read_stream) - resources = list(response) - assert len(resources) == 1 - assert expected_response == resources[0] - - assert len(channel.requests) == 1 - expected_request = storage_pb2.ReadRowsRequest(read_stream=read_stream) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_read_rows_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup request - read_stream = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - with pytest.raises(CustomException): - client.read_rows(read_stream) - - def test_split_read_stream(self): - # Setup Expected Response - expected_response = {} - expected_response = storage_pb2.SplitReadStreamResponse(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup Request - name = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - response = client.split_read_stream(name) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.SplitReadStreamRequest(name=name) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_split_read_stream_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup request - name = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - with pytest.raises(CustomException): - client.split_read_stream(name) diff --git a/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py b/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py deleted file mode 100644 index 7f659b5b..00000000 --- a/tests/unit/gapic/v1beta1/test_big_query_storage_client_v1beta1.py +++ /dev/null @@ -1,261 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Unit tests.""" - -import mock -import pytest - -from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client # noqa -from google.cloud.bigquery_storage_v1beta1.proto import storage_pb2 -from google.cloud.bigquery_storage_v1beta1.proto import table_reference_pb2 -from google.protobuf import empty_pb2 - - -class MultiCallableStub(object): - """Stub for the grpc.UnaryUnaryMultiCallable interface.""" - - def __init__(self, method, channel_stub): - self.method = method - self.channel_stub = channel_stub - - def __call__(self, request, timeout=None, metadata=None, credentials=None): - self.channel_stub.requests.append((self.method, request)) - - response = None - if self.channel_stub.responses: - response = self.channel_stub.responses.pop() - - if isinstance(response, Exception): - raise response - - if response: - return response - - -class ChannelStub(object): - """Stub for the grpc.Channel interface.""" - - def __init__(self, responses=[]): - self.responses = responses - self.requests = [] - - def unary_unary(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - def unary_stream(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - -class CustomException(Exception): - pass - - -class TestBigQueryStorageClient(object): - def test_create_read_session(self): - # Setup Expected Response - name = "name3373707" - expected_response = {"name": name} - expected_response = storage_pb2.ReadSession(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup Request - table_reference = {} - parent = client.project_path("[PROJECT]") - - response = client.create_read_session(table_reference, parent) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.CreateReadSessionRequest( - table_reference=table_reference, parent=parent - ) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_create_read_session_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup request - table_reference = {} - parent = client.project_path("[PROJECT]") - - with pytest.raises(CustomException): - client.create_read_session(table_reference, parent) - - def test_read_rows(self): - # Setup Expected Response - row_count = 1340416618 - expected_response = {"row_count": row_count} - expected_response = storage_pb2.ReadRowsResponse(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[iter([expected_response])]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup Request - read_position = {} - - response = client.read_rows(read_position) - resources = list(response) - assert len(resources) == 1 - assert expected_response == resources[0] - - assert len(channel.requests) == 1 - expected_request = storage_pb2.ReadRowsRequest(read_position=read_position) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_read_rows_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup request - read_position = {} - - with pytest.raises(CustomException): - client.read_rows(read_position) - - def test_batch_create_read_session_streams(self): - # Setup Expected Response - expected_response = {} - expected_response = storage_pb2.BatchCreateReadSessionStreamsResponse( - **expected_response - ) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup Request - session = {} - requested_streams = 1017221410 - - response = client.batch_create_read_session_streams(session, requested_streams) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.BatchCreateReadSessionStreamsRequest( - session=session, requested_streams=requested_streams - ) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_batch_create_read_session_streams_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup request - session = {} - requested_streams = 1017221410 - - with pytest.raises(CustomException): - client.batch_create_read_session_streams(session, requested_streams) - - def test_finalize_stream(self): - channel = ChannelStub() - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup Request - stream = {} - - client.finalize_stream(stream) - - assert len(channel.requests) == 1 - expected_request = storage_pb2.FinalizeStreamRequest(stream=stream) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_finalize_stream_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup request - stream = {} - - with pytest.raises(CustomException): - client.finalize_stream(stream) - - def test_split_read_stream(self): - # Setup Expected Response - expected_response = {} - expected_response = storage_pb2.SplitReadStreamResponse(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup Request - original_stream = {} - - response = client.split_read_stream(original_stream) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.SplitReadStreamRequest( - original_stream=original_stream - ) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_split_read_stream_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_storage_client.BigQueryStorageClient() - - # Setup request - original_stream = {} - - with pytest.raises(CustomException): - client.split_read_stream(original_stream) diff --git a/tests/unit/gapic/v1beta2/test_big_query_read_client_v1beta2.py b/tests/unit/gapic/v1beta2/test_big_query_read_client_v1beta2.py deleted file mode 100644 index 9900426d..00000000 --- a/tests/unit/gapic/v1beta2/test_big_query_read_client_v1beta2.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Unit tests.""" - -import mock -import pytest - -from google.cloud.bigquery_storage_v1beta2.gapic import big_query_read_client # noqa -from google.cloud.bigquery_storage_v1beta2.proto import storage_pb2 -from google.cloud.bigquery_storage_v1beta2.proto import stream_pb2 - - -class MultiCallableStub(object): - """Stub for the grpc.UnaryUnaryMultiCallable interface.""" - - def __init__(self, method, channel_stub): - self.method = method - self.channel_stub = channel_stub - - def __call__(self, request, timeout=None, metadata=None, credentials=None): - self.channel_stub.requests.append((self.method, request)) - - response = None - if self.channel_stub.responses: - response = self.channel_stub.responses.pop() - - if isinstance(response, Exception): - raise response - - if response: - return response - - -class ChannelStub(object): - """Stub for the grpc.Channel interface.""" - - def __init__(self, responses=[]): - self.responses = responses - self.requests = [] - - def unary_unary(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - def unary_stream(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - -class CustomException(Exception): - pass - - -class TestBigQueryReadClient(object): - def test_create_read_session(self): - # Setup Expected Response - name = "name3373707" - table = "table110115790" - expected_response = {"name": name, "table": table} - expected_response = stream_pb2.ReadSession(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup Request - parent = client.project_path("[PROJECT]") - read_session = {} - - response = client.create_read_session(parent, read_session) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.CreateReadSessionRequest( - parent=parent, read_session=read_session - ) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_create_read_session_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup request - parent = client.project_path("[PROJECT]") - read_session = {} - - with pytest.raises(CustomException): - client.create_read_session(parent, read_session) - - def test_read_rows(self): - # Setup Expected Response - row_count = 1340416618 - expected_response = {"row_count": row_count} - expected_response = storage_pb2.ReadRowsResponse(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[iter([expected_response])]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup Request - read_stream = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - response = client.read_rows(read_stream) - resources = list(response) - assert len(resources) == 1 - assert expected_response == resources[0] - - assert len(channel.requests) == 1 - expected_request = storage_pb2.ReadRowsRequest(read_stream=read_stream) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_read_rows_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup request - read_stream = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - with pytest.raises(CustomException): - client.read_rows(read_stream) - - def test_split_read_stream(self): - # Setup Expected Response - expected_response = {} - expected_response = storage_pb2.SplitReadStreamResponse(**expected_response) - - # Mock the API response - channel = ChannelStub(responses=[expected_response]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup Request - name = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - response = client.split_read_stream(name) - assert expected_response == response - - assert len(channel.requests) == 1 - expected_request = storage_pb2.SplitReadStreamRequest(name=name) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_split_read_stream_exception(self): - # Mock the API response - channel = ChannelStub(responses=[CustomException()]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = big_query_read_client.BigQueryReadClient() - - # Setup request - name = client.read_stream_path( - "[PROJECT]", "[LOCATION]", "[SESSION]", "[STREAM]" - ) - - with pytest.raises(CustomException): - client.split_read_stream(name) diff --git a/tests/unit/test_client_v1.py b/tests/unit/test_client_v1.py index b407b7fa..4a369a89 100644 --- a/tests/unit/test_client_v1.py +++ b/tests/unit/test_client_v1.py @@ -16,7 +16,7 @@ import mock import pytest -from google.cloud.bigquery_storage_v1 import types +from google.cloud.bigquery.storage import types PROJECT = "my-project" @@ -25,46 +25,60 @@ @pytest.fixture() def mock_transport(monkeypatch): - from google.cloud.bigquery_storage_v1.gapic.transports import ( - big_query_read_grpc_transport, - ) + from google.cloud.bigquery.storage_v1.services.big_query_read import transports + + fake_create_session_rpc = mock.Mock(name="create_read_session_rpc") + fake_read_rows_rpc = mock.Mock(name="read_rows_rpc") transport = mock.create_autospec( - big_query_read_grpc_transport.BigQueryReadGrpcTransport + transports.grpc.BigQueryReadGrpcTransport, instance=True ) + + transport.create_read_session = mock.Mock(name="fake_create_read_session") + transport.read_rows = mock.Mock(name="fake_read_rows") + + transport._wrapped_methods = { + transport.create_read_session: fake_create_session_rpc, + transport.read_rows: fake_read_rows_rpc, + } + return transport @pytest.fixture() def client_under_test(mock_transport): - from google.cloud.bigquery_storage_v1 import client - - # The mock is detected as a callable. By creating a real callable here, the - # mock can still be used to verify RPCs. - def transport_callable(credentials=None, default_class=None, address=None): - return mock_transport + from google.cloud.bigquery import storage - return client.BigQueryReadClient(transport=transport_callable) + return storage.BigQueryReadClient(transport=mock_transport) -def test_constructor_w_client_info(mock_transport): - from google.cloud.bigquery_storage_v1 import client +def test_constructor_w_client_info(): + from google.cloud.bigquery import storage - def transport_callable(credentials=None, default_class=None, address=None): - return mock_transport + class MyTransport: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs - client_under_test = client.BigQueryReadClient( - transport=transport_callable, - client_info=client_info.ClientInfo( - client_library_version="test-client-version" - ), + transport_class_patcher = mock.patch.object( + storage.BigQueryReadClient, "get_transport_class", return_value=MyTransport ) - user_agent = client_under_test._client_info.to_user_agent() + with transport_class_patcher: + client_under_test = storage.BigQueryReadClient( + client_info=client_info.ClientInfo( + client_library_version="test-client-version" + ), + ) + + transport_client_info = client_under_test._transport.kwargs["client_info"] + user_agent = transport_client_info.to_user_agent() assert "test-client-version" in user_agent def test_create_read_session(mock_transport, client_under_test): + assert client_under_test._transport is mock_transport # sanity check + table = "projects/{}/datasets/{}/tables/{}".format( "data-project-id", "dataset_id", "table_id" ) @@ -72,13 +86,16 @@ def test_create_read_session(mock_transport, client_under_test): read_session = types.ReadSession() read_session.table = table - client_under_test.create_read_session("projects/other-project", read_session) + client_under_test.create_read_session( + parent="projects/other-project", read_session=read_session + ) - expected_request = types.CreateReadSessionRequest( + expected_session_arg = types.CreateReadSessionRequest( parent="projects/other-project", read_session=read_session ) - mock_transport.create_read_session.assert_called_once_with( - expected_request, metadata=mock.ANY, timeout=mock.ANY + rpc_callable = mock_transport._wrapped_methods[mock_transport.create_read_session] + rpc_callable.assert_called_once_with( + expected_session_arg, metadata=mock.ANY, retry=mock.ANY, timeout=mock.ANY ) diff --git a/tests/unit/test_client_v1beta1.py b/tests/unit/test_client_v1beta1.py deleted file mode 100644 index fbce027d..00000000 --- a/tests/unit/test_client_v1beta1.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from google.api_core.gapic_v1 import client_info -import mock -import pytest - -from google.cloud.bigquery_storage_v1beta1 import types - - -PROJECT = "my-project" -SERVICE_ACCOUNT_PROJECT = "project-from-credentials" - - -@pytest.fixture() -def mock_transport(monkeypatch): - from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( - big_query_storage_grpc_transport, - ) - - transport = mock.create_autospec( - big_query_storage_grpc_transport.BigQueryStorageGrpcTransport - ) - return transport - - -@pytest.fixture() -def client_under_test(mock_transport): - from google.cloud.bigquery_storage_v1beta1 import client - - # The mock is detected as a callable. By creating a real callable here, the - # mock can still be used to verify RPCs. - def transport_callable(credentials=None, default_class=None, address=None): - return mock_transport - - return client.BigQueryStorageClient(transport=transport_callable) - - -def test_constructor_w_client_info(mock_transport): - from google.cloud.bigquery_storage_v1beta1 import client - - def transport_callable(credentials=None, default_class=None, address=None): - return mock_transport - - client_under_test = client.BigQueryStorageClient( - transport=transport_callable, - client_info=client_info.ClientInfo( - client_library_version="test-client-version" - ), - ) - - user_agent = client_under_test._client_info.to_user_agent() - assert "test-client-version" in user_agent - - -def test_create_read_session(mock_transport, client_under_test): - table_reference = types.TableReference( - project_id="data-project-id", dataset_id="dataset_id", table_id="table_id" - ) - - client_under_test.create_read_session(table_reference, "projects/other-project") - - expected_request = types.CreateReadSessionRequest( - table_reference=table_reference, parent="projects/other-project" - ) - mock_transport.create_read_session.assert_called_once_with( - expected_request, metadata=mock.ANY, timeout=mock.ANY - ) - - -def test_read_rows(mock_transport, client_under_test): - stream_position = types.StreamPosition() - - client_under_test.read_rows(stream_position) - - expected_request = types.ReadRowsRequest(read_position=stream_position) - mock_transport.create_read_session.read_rows( - expected_request, metadata=mock.ANY, timeout=mock.ANY - ) diff --git a/tests/unit/test_reader_v1.py b/tests/unit/test_reader_v1.py index 1e67d2b4..216d4561 100644 --- a/tests/unit/test_reader_v1.py +++ b/tests/unit/test_reader_v1.py @@ -29,7 +29,7 @@ import six import google.api_core.exceptions -from google.cloud import bigquery_storage_v1 +from google.cloud.bigquery.storage import types PROJECT = "my-project" @@ -125,10 +125,10 @@ def class_under_test(mut): @pytest.fixture() -def mock_client(): - from google.cloud.bigquery_storage_v1.gapic import big_query_read_client +def mock_gapic_client(): + from google.cloud.bigquery import storage_v1 - return mock.create_autospec(big_query_read_client.BigQueryReadClient) + return mock.create_autospec(storage_v1.BigQueryReadClient) def _bq_to_avro_blocks(bq_blocks, avro_schema_json): @@ -138,7 +138,7 @@ def _bq_to_avro_blocks(bq_blocks, avro_schema_json): blockio = six.BytesIO() for row in block: fastavro.schemaless_writer(blockio, avro_schema, row) - response = bigquery_storage_v1.types.ReadRowsResponse() + response = types.ReadRowsResponse() response.row_count = len(block) response.avro_rows.serialized_binary_rows = blockio.getvalue() avro_blocks.append(response) @@ -166,7 +166,7 @@ def _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): def _bq_to_arrow_batches(bq_blocks, arrow_schema): arrow_batches = [] for record_batch in _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): - response = bigquery_storage_v1.types.ReadRowsResponse() + response = types.ReadRowsResponse() response.arrow_record_batch.serialized_record_batch = ( record_batch.serialize().to_pybytes() ) @@ -204,11 +204,11 @@ def _avro_blocks_w_deadline(avro_blocks): def _generate_avro_read_session(avro_schema_json): schema = json.dumps(avro_schema_json) - return bigquery_storage_v1.types.ReadSession(avro_schema={"schema": schema}) + return types.ReadSession(avro_schema={"schema": schema}) def _generate_arrow_read_session(arrow_schema): - return bigquery_storage_v1.types.ReadSession( + return types.ReadSession( arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()} ) @@ -252,9 +252,11 @@ def _get_avro_bytes(rows, avro_schema): return avro_file.getvalue() -def test_avro_rows_raises_import_error(mut, class_under_test, mock_client, monkeypatch): +def test_avro_rows_raises_import_error( + mut, class_under_test, mock_gapic_client, monkeypatch +): monkeypatch.setattr(mut, "fastavro", None) - reader = class_under_test([], mock_client, "", 0, {}) + reader = class_under_test([], mock_gapic_client, "", 0, {}) bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) @@ -265,10 +267,10 @@ def test_avro_rows_raises_import_error(mut, class_under_test, mock_client, monke def test_pyarrow_rows_raises_import_error( - mut, class_under_test, mock_client, monkeypatch + mut, class_under_test, mock_gapic_client, monkeypatch ): monkeypatch.setattr(mut, "pyarrow", None) - reader = class_under_test([], mock_client, "", 0, {}) + reader = class_under_test([], mock_gapic_client, "", 0, {}) bq_columns = [{"name": "int_col", "type": "int64"}] arrow_schema = _bq_to_arrow_schema(bq_columns) @@ -279,53 +281,53 @@ def test_pyarrow_rows_raises_import_error( def test_rows_no_schema_set_raises_type_error( - mut, class_under_test, mock_client, monkeypatch + mut, class_under_test, mock_gapic_client, monkeypatch ): - reader = class_under_test([], mock_client, "", 0, {}) - read_session = bigquery_storage_v1.types.ReadSession() + reader = class_under_test([], mock_gapic_client, "", 0, {}) + read_session = types.ReadSession() with pytest.raises(TypeError): reader.rows(read_session) -def test_rows_w_empty_stream(class_under_test, mock_client): +def test_rows_w_empty_stream(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) - reader = class_under_test([], mock_client, "", 0, {}) + reader = class_under_test([], mock_gapic_client, "", 0, {}) got = reader.rows(read_session) assert tuple(got) == () -def test_rows_w_empty_stream_arrow(class_under_test, mock_client): +def test_rows_w_empty_stream_arrow(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] arrow_schema = _bq_to_arrow_schema(bq_columns) read_session = _generate_arrow_read_session(arrow_schema) - reader = class_under_test([], mock_client, "", 0, {}) + reader = class_under_test([], mock_gapic_client, "", 0, {}) got = reader.rows(read_session) assert tuple(got) == () -def test_rows_w_scalars(class_under_test, mock_client): +def test_rows_w_scalars(class_under_test, mock_gapic_client): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) - reader = class_under_test(avro_blocks, mock_client, "", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = tuple(reader.rows(read_session)) expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) assert got == expected -def test_rows_w_scalars_arrow(class_under_test, mock_client): +def test_rows_w_scalars_arrow(class_under_test, mock_gapic_client): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = tuple( dict((key, value.as_py()) for key, value in row_dict.items()) for row_dict in reader.rows(read_session) @@ -335,7 +337,7 @@ def test_rows_w_scalars_arrow(class_under_test, mock_client): assert got == expected -def test_rows_w_timeout(class_under_test, mock_client): +def test_rows_w_timeout(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) @@ -349,11 +351,11 @@ def test_rows_w_timeout(class_under_test, mock_client): bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - mock_client.read_rows.return_value = avro_blocks_2 + mock_gapic_client.read_rows.return_value = avro_blocks_2 reader = class_under_test( avro_blocks_1, - mock_client, + mock_gapic_client, "teststream", 0, {"metadata": {"test-key": "test-value"}}, @@ -364,10 +366,10 @@ def test_rows_w_timeout(class_under_test, mock_client): # Don't reconnect on DeadlineException. This allows user-specified timeouts # to be respected. - mock_client.read_rows.assert_not_called() + mock_gapic_client.read_rows.assert_not_called() -def test_rows_w_nonresumable_internal_error(class_under_test, mock_client): +def test_rows_w_nonresumable_internal_error(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) @@ -376,17 +378,17 @@ def test_rows_w_nonresumable_internal_error(class_under_test, mock_client): _bq_to_avro_blocks(bq_blocks, avro_schema) ) - reader = class_under_test(avro_blocks, mock_client, "teststream", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "teststream", 0, {}) with pytest.raises( google.api_core.exceptions.InternalServerError, match="nonresumable error" ): list(reader.rows(read_session)) - mock_client.read_rows.assert_not_called() + mock_gapic_client.read_rows.assert_not_called() -def test_rows_w_reconnect(class_under_test, mock_client): +def test_rows_w_reconnect(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) @@ -403,11 +405,11 @@ def test_rows_w_reconnect(class_under_test, mock_client): bq_blocks_3 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] avro_blocks_3 = _bq_to_avro_blocks(bq_blocks_3, avro_schema) - mock_client.read_rows.side_effect = (avro_blocks_2, avro_blocks_3) + mock_gapic_client.read_rows.side_effect = (avro_blocks_2, avro_blocks_3) reader = class_under_test( avro_blocks_1, - mock_client, + mock_gapic_client, "teststream", 0, {"metadata": {"test-key": "test-value"}}, @@ -423,15 +425,15 @@ def test_rows_w_reconnect(class_under_test, mock_client): ) assert tuple(got) == expected - mock_client.read_rows.assert_any_call( - "teststream", 4, metadata={"test-key": "test-value"} + mock_gapic_client.read_rows.assert_any_call( + read_stream="teststream", offset=4, metadata={"test-key": "test-value"} ) - mock_client.read_rows.assert_called_with( - "teststream", 7, metadata={"test-key": "test-value"} + mock_gapic_client.read_rows.assert_called_with( + read_stream="teststream", offset=7, metadata={"test-key": "test-value"} ) -def test_rows_w_reconnect_by_page(class_under_test, mock_client): +def test_rows_w_reconnect_by_page(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) read_session = _generate_avro_read_session(avro_schema) @@ -443,11 +445,11 @@ def test_rows_w_reconnect_by_page(class_under_test, mock_client): bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - mock_client.read_rows.return_value = avro_blocks_2 + mock_gapic_client.read_rows.return_value = avro_blocks_2 reader = class_under_test( _pages_w_unavailable(avro_blocks_1), - mock_client, + mock_gapic_client, "teststream", 0, {"metadata": {"test-key": "test-value"}}, @@ -480,13 +482,13 @@ def test_rows_w_reconnect_by_page(class_under_test, mock_client): def test_to_arrow_no_pyarrow_raises_import_error( - mut, class_under_test, mock_client, monkeypatch + mut, class_under_test, mock_gapic_client, monkeypatch ): monkeypatch.setattr(mut, "pyarrow", None) arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) with pytest.raises(ImportError): reader.to_arrow(read_session) @@ -502,7 +504,7 @@ def test_to_arrow_w_scalars_arrow(class_under_test): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) actual_table = reader.to_arrow(read_session) expected_table = pyarrow.Table.from_batches( _bq_to_arrow_batch_objects(SCALAR_BLOCKS, arrow_schema) @@ -511,14 +513,14 @@ def test_to_arrow_w_scalars_arrow(class_under_test): def test_to_dataframe_no_pandas_raises_import_error( - mut, class_under_test, mock_client, monkeypatch + mut, class_under_test, mock_gapic_client, monkeypatch ): monkeypatch.setattr(mut, "pandas", None) avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) - reader = class_under_test(avro_blocks, mock_client, "", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) with pytest.raises(ImportError): reader.to_dataframe(read_session) @@ -531,10 +533,10 @@ def test_to_dataframe_no_pandas_raises_import_error( def test_to_dataframe_no_schema_set_raises_type_error( - mut, class_under_test, mock_client, monkeypatch + mut, class_under_test, mock_gapic_client, monkeypatch ): - reader = class_under_test([], mock_client, "", 0, {}) - read_session = bigquery_storage_v1.types.ReadSession() + reader = class_under_test([], mock_gapic_client, "", 0, {}) + read_session = types.ReadSession() with pytest.raises(TypeError): reader.to_dataframe(read_session) @@ -545,7 +547,7 @@ def test_to_dataframe_w_scalars(class_under_test): read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) - reader = class_under_test(avro_blocks, mock_client, "", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame( @@ -575,7 +577,7 @@ def test_to_dataframe_w_scalars_arrow(class_under_test): read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame( @@ -602,7 +604,7 @@ def test_to_dataframe_w_dtypes(class_under_test): ] avro_blocks = _bq_to_avro_blocks(blocks, avro_schema) - reader = class_under_test(avro_blocks, mock_client, "", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) expected = pandas.DataFrame( @@ -632,7 +634,7 @@ def test_to_dataframe_w_dtypes_arrow(class_under_test): ] arrow_batches = _bq_to_arrow_batches(blocks, arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) expected = pandas.DataFrame( @@ -652,7 +654,7 @@ def test_to_dataframe_empty_w_scalars_avro(class_under_test): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks([], avro_schema) - reader = class_under_test(avro_blocks, mock_client, "", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) @@ -672,7 +674,7 @@ def test_to_dataframe_empty_w_scalars_arrow(class_under_test): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches([], arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session) @@ -688,7 +690,7 @@ def test_to_dataframe_empty_w_scalars_arrow(class_under_test): ) -def test_to_dataframe_empty_w_dtypes_avro(class_under_test, mock_client): +def test_to_dataframe_empty_w_dtypes_avro(class_under_test, mock_gapic_client): avro_schema = _bq_to_avro_schema( [ {"name": "bigfloat", "type": "float64"}, @@ -697,7 +699,7 @@ def test_to_dataframe_empty_w_dtypes_avro(class_under_test, mock_client): ) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks([], avro_schema) - reader = class_under_test(avro_blocks, mock_client, "", 0, {}) + reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) @@ -711,7 +713,7 @@ def test_to_dataframe_empty_w_dtypes_avro(class_under_test, mock_client): ) -def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_client): +def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_gapic_client): arrow_schema = _bq_to_arrow_schema( [ {"name": "bigfloat", "type": "float64"}, @@ -720,7 +722,7 @@ def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_client): ) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches([], arrow_schema) - reader = class_under_test(arrow_batches, mock_client, "", 0, {}) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) @@ -734,7 +736,7 @@ def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_client): ) -def test_to_dataframe_by_page(class_under_test, mock_client): +def test_to_dataframe_by_page(class_under_test, mock_gapic_client): bq_columns = [ {"name": "int_col", "type": "int64"}, {"name": "bool_col", "type": "bool"}, @@ -752,11 +754,11 @@ def test_to_dataframe_by_page(class_under_test, mock_client): avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema) avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - mock_client.read_rows.return_value = avro_blocks_2 + mock_gapic_client.read_rows.return_value = avro_blocks_2 reader = class_under_test( _pages_w_unavailable(avro_blocks_1), - mock_client, + mock_gapic_client, "teststream", 0, {"metadata": {"test-key": "test-value"}}, @@ -797,7 +799,7 @@ def test_to_dataframe_by_page(class_under_test, mock_client): ) -def test_to_dataframe_by_page_arrow(class_under_test, mock_client): +def test_to_dataframe_by_page_arrow(class_under_test, mock_gapic_client): bq_columns = [ {"name": "int_col", "type": "int64"}, {"name": "bool_col", "type": "bool"}, @@ -825,9 +827,11 @@ def test_to_dataframe_by_page_arrow(class_under_test, mock_client): batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema) batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema) - mock_client.read_rows.return_value = batch_2 + mock_gapic_client.read_rows.return_value = batch_2 - reader = class_under_test(_pages_w_unavailable(batch_1), mock_client, "", 0, {}) + reader = class_under_test( + _pages_w_unavailable(batch_1), mock_gapic_client, "", 0, {} + ) got = reader.rows(read_session) pages = iter(got.pages) diff --git a/tests/unit/test_reader_v1beta1.py b/tests/unit/test_reader_v1beta1.py deleted file mode 100644 index 0cded144..00000000 --- a/tests/unit/test_reader_v1beta1.py +++ /dev/null @@ -1,868 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -import decimal -import itertools -import json - -import fastavro -import pyarrow -import mock -import pandas -import pandas.testing -import pytest -import pytz -import six - -import google.api_core.exceptions -from google.cloud import bigquery_storage_v1beta1 - - -PROJECT = "my-project" -BQ_TO_AVRO_TYPES = { - "int64": "long", - "float64": "double", - "bool": "boolean", - "numeric": {"type": "bytes", "logicalType": "decimal", "precision": 38, "scale": 9}, - "string": "string", - "bytes": "bytes", - "date": {"type": "int", "logicalType": "date"}, - "datetime": {"type": "string", "sqlType": "DATETIME"}, - "time": {"type": "long", "logicalType": "time-micros"}, - "timestamp": {"type": "long", "logicalType": "timestamp-micros"}, -} -# This dictionary is duplicated in bigquery/google/cloud/bigquery/_pandas_helpers.py -# When modifying it be sure to update it there as well. -BQ_TO_ARROW_TYPES = { - "int64": pyarrow.int64(), - "float64": pyarrow.float64(), - "bool": pyarrow.bool_(), - "numeric": pyarrow.decimal128(38, 9), - "string": pyarrow.utf8(), - "bytes": pyarrow.binary(), - "date": pyarrow.date32(), # int32 days since epoch - "datetime": pyarrow.timestamp("us"), - "time": pyarrow.time64("us"), - "timestamp": pyarrow.timestamp("us", tz="UTC"), -} -SCALAR_COLUMNS = [ - {"name": "int_col", "type": "int64"}, - {"name": "float_col", "type": "float64"}, - {"name": "num_col", "type": "numeric"}, - {"name": "bool_col", "type": "bool"}, - {"name": "str_col", "type": "string"}, - {"name": "bytes_col", "type": "bytes"}, - {"name": "date_col", "type": "date"}, - {"name": "time_col", "type": "time"}, - {"name": "ts_col", "type": "timestamp"}, -] -SCALAR_COLUMN_NAMES = [field["name"] for field in SCALAR_COLUMNS] -SCALAR_BLOCKS = [ - [ - { - "int_col": 123, - "float_col": 3.14, - "num_col": decimal.Decimal("9.99"), - "bool_col": True, - "str_col": "hello world", - "bytes_col": b"ascii bytes", - "date_col": datetime.date(1998, 9, 4), - "time_col": datetime.time(12, 0), - "ts_col": datetime.datetime(2000, 1, 1, 5, 0, tzinfo=pytz.utc), - }, - { - "int_col": 456, - "float_col": 2.72, - "num_col": decimal.Decimal("0.99"), - "bool_col": False, - "str_col": "hallo welt", - "bytes_col": b"\xbb\xee\xff", - "date_col": datetime.date(1995, 3, 2), - "time_col": datetime.time(13, 37), - "ts_col": datetime.datetime(1965, 4, 3, 2, 1, tzinfo=pytz.utc), - }, - ], - [ - { - "int_col": 789, - "float_col": 1.23, - "num_col": decimal.Decimal("5.67"), - "bool_col": True, - "str_col": u"こんにちは世界", - "bytes_col": b"\x54\x69\x6d", - "date_col": datetime.date(1970, 1, 1), - "time_col": datetime.time(16, 20), - "ts_col": datetime.datetime(1991, 8, 25, 20, 57, 8, tzinfo=pytz.utc), - } - ], -] - - -@pytest.fixture() -def mut(): - from google.cloud.bigquery_storage_v1beta1 import reader - - return reader - - -@pytest.fixture() -def class_under_test(mut): - return mut.ReadRowsStream - - -@pytest.fixture() -def mock_client(): - from google.cloud.bigquery_storage_v1beta1.gapic import big_query_storage_client - - return mock.create_autospec(big_query_storage_client.BigQueryStorageClient) - - -def _bq_to_avro_blocks(bq_blocks, avro_schema_json): - avro_schema = fastavro.parse_schema(avro_schema_json) - avro_blocks = [] - for block in bq_blocks: - blockio = six.BytesIO() - for row in block: - fastavro.schemaless_writer(blockio, avro_schema, row) - - response = bigquery_storage_v1beta1.types.ReadRowsResponse() - response.row_count = len(block) - response.avro_rows.serialized_binary_rows = blockio.getvalue() - avro_blocks.append(response) - return avro_blocks - - -def _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): - arrow_batches = [] - for block in bq_blocks: - arrays = [] - for name in arrow_schema.names: - arrays.append( - pyarrow.array( - (row[name] for row in block), - type=arrow_schema.field(name).type, - size=len(block), - ) - ) - arrow_batches.append( - pyarrow.RecordBatch.from_arrays(arrays, schema=arrow_schema) - ) - return arrow_batches - - -def _bq_to_arrow_batches(bq_blocks, arrow_schema): - arrow_batches = [] - for record_batch in _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): - response = bigquery_storage_v1beta1.types.ReadRowsResponse() - response.arrow_record_batch.serialized_record_batch = ( - record_batch.serialize().to_pybytes() - ) - arrow_batches.append(response) - return arrow_batches - - -def _pages_w_nonresumable_internal_error(avro_blocks): - for block in avro_blocks: - yield block - raise google.api_core.exceptions.InternalServerError( - "INTERNAL: Got a nonresumable error." - ) - - -def _pages_w_resumable_internal_error(avro_blocks): - for block in avro_blocks: - yield block - raise google.api_core.exceptions.InternalServerError( - "INTERNAL: Received RST_STREAM with error code 2." - ) - - -def _pages_w_unavailable(pages): - for page in pages: - yield page - raise google.api_core.exceptions.ServiceUnavailable("test: please reconnect") - - -def _avro_blocks_w_deadline(avro_blocks): - for block in avro_blocks: - yield block - raise google.api_core.exceptions.DeadlineExceeded("test: timeout, don't reconnect") - - -def _generate_avro_read_session(avro_schema_json): - schema = json.dumps(avro_schema_json) - return bigquery_storage_v1beta1.types.ReadSession(avro_schema={"schema": schema}) - - -def _generate_arrow_read_session(arrow_schema): - return bigquery_storage_v1beta1.types.ReadSession( - arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()} - ) - - -def _bq_to_avro_schema(bq_columns): - fields = [] - avro_schema = {"type": "record", "name": "__root__", "fields": fields} - - for column in bq_columns: - doc = column.get("description") - name = column["name"] - type_ = BQ_TO_AVRO_TYPES[column["type"]] - mode = column.get("mode", "nullable").lower() - - if mode == "nullable": - type_ = ["null", type_] - - fields.append({"name": name, "type": type_, "doc": doc}) - - return avro_schema - - -def _bq_to_arrow_schema(bq_columns): - def bq_col_as_field(column): - metadata = None - if column.get("description") is not None: - metadata = {"description": column.get("description")} - name = column["name"] - type_ = BQ_TO_ARROW_TYPES[column["type"]] - mode = column.get("mode", "nullable").lower() - - return pyarrow.field(name, type_, mode == "nullable", metadata) - - return pyarrow.schema(bq_col_as_field(c) for c in bq_columns) - - -def _get_avro_bytes(rows, avro_schema): - avro_file = six.BytesIO() - for row in rows: - fastavro.schemaless_writer(avro_file, avro_schema, row) - return avro_file.getvalue() - - -def test_avro_rows_raises_import_error(mut, class_under_test, mock_client, monkeypatch): - monkeypatch.setattr(mut, "fastavro", None) - reader = class_under_test( - [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - - bq_columns = [{"name": "int_col", "type": "int64"}] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - - with pytest.raises(ImportError): - reader.rows(read_session) - - -def test_pyarrow_rows_raises_import_error( - mut, class_under_test, mock_client, monkeypatch -): - monkeypatch.setattr(mut, "pyarrow", None) - reader = class_under_test( - [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - - bq_columns = [{"name": "int_col", "type": "int64"}] - arrow_schema = _bq_to_arrow_schema(bq_columns) - read_session = _generate_arrow_read_session(arrow_schema) - - with pytest.raises(ImportError): - reader.rows(read_session) - - -def test_rows_no_schema_set_raises_type_error( - mut, class_under_test, mock_client, monkeypatch -): - reader = class_under_test( - [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - read_session = bigquery_storage_v1beta1.types.ReadSession() - - with pytest.raises(TypeError): - reader.rows(read_session) - - -def test_rows_w_empty_stream(class_under_test, mock_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - reader = class_under_test( - [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - - got = reader.rows(read_session) - assert got.total_rows is None - assert tuple(got) == () - - -def test_rows_w_empty_stream_arrow(class_under_test, mock_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - arrow_schema = _bq_to_arrow_schema(bq_columns) - read_session = _generate_arrow_read_session(arrow_schema) - reader = class_under_test( - [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - - got = reader.rows(read_session) - assert got.total_rows is None - assert tuple(got) == () - - -def test_rows_w_scalars(class_under_test, mock_client): - avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) - read_session = _generate_avro_read_session(avro_schema) - avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) - - reader = class_under_test( - avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - got = tuple(reader.rows(read_session)) - - expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) - assert got == expected - - -def test_rows_w_scalars_arrow(class_under_test, mock_client): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - - reader = class_under_test( - arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - got = tuple( - dict((key, value.as_py()) for key, value in row_dict.items()) - for row_dict in reader.rows(read_session) - ) - - expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) - assert got == expected - - -def test_rows_w_timeout(class_under_test, mock_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - bq_blocks_1 = [ - [{"int_col": 123}, {"int_col": 234}], - [{"int_col": 345}, {"int_col": 456}], - ] - avro_blocks_1 = _avro_blocks_w_deadline( - _bq_to_avro_blocks(bq_blocks_1, avro_schema) - ) - bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] - avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - - mock_client.read_rows.return_value = avro_blocks_2 - stream_position = bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"} - ) - - reader = class_under_test( - avro_blocks_1, - mock_client, - stream_position, - {"metadata": {"test-key": "test-value"}}, - ) - - with pytest.raises(google.api_core.exceptions.DeadlineExceeded): - list(reader.rows(read_session)) - - # Don't reconnect on DeadlineException. This allows user-specified timeouts - # to be respected. - mock_client.read_rows.assert_not_called() - - -def test_rows_w_nonresumable_internal_error(class_under_test, mock_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - bq_blocks = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]] - avro_blocks = _pages_w_nonresumable_internal_error( - _bq_to_avro_blocks(bq_blocks, avro_schema) - ) - - stream_position = bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"} - ) - - reader = class_under_test(avro_blocks, mock_client, stream_position, {}) - - with pytest.raises( - google.api_core.exceptions.InternalServerError, match="nonresumable error" - ): - list(reader.rows(read_session)) - - mock_client.read_rows.assert_not_called() - - -def test_rows_w_reconnect(class_under_test, mock_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - bq_blocks_1 = [ - [{"int_col": 123}, {"int_col": 234}], - [{"int_col": 345}, {"int_col": 456}], - ] - avro_blocks_1 = _pages_w_unavailable(_bq_to_avro_blocks(bq_blocks_1, avro_schema)) - bq_blocks_2 = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]] - avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - avro_blocks_2 = _pages_w_resumable_internal_error( - _bq_to_avro_blocks(bq_blocks_2, avro_schema) - ) - bq_blocks_3 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] - avro_blocks_3 = _bq_to_avro_blocks(bq_blocks_3, avro_schema) - - for block in avro_blocks_3: - block.status.estimated_row_count = 7 - - mock_client.read_rows.side_effect = (avro_blocks_2, avro_blocks_3) - stream_position = bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"} - ) - - reader = class_under_test( - avro_blocks_1, - mock_client, - stream_position, - {"metadata": {"test-key": "test-value"}}, - ) - got = reader.rows(read_session) - - expected = tuple( - itertools.chain( - itertools.chain.from_iterable(bq_blocks_1), - itertools.chain.from_iterable(bq_blocks_2), - itertools.chain.from_iterable(bq_blocks_3), - ) - ) - - assert tuple(got) == expected - assert got.total_rows == 7 - mock_client.read_rows.assert_any_call( - bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"}, offset=4 - ), - metadata={"test-key": "test-value"}, - ) - mock_client.read_rows.assert_called_with( - bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"}, offset=7 - ), - metadata={"test-key": "test-value"}, - ) - - -def test_rows_w_reconnect_by_page(class_under_test, mock_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - bq_blocks_1 = [ - [{"int_col": 123}, {"int_col": 234}], - [{"int_col": 345}, {"int_col": 456}], - ] - avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema) - bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]] - avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - - avro_blocks_1[0].status.estimated_row_count = 8 - avro_blocks_1[1].status.estimated_row_count = 6 - avro_blocks_2[0].status.estimated_row_count = 9 - avro_blocks_2[1].status.estimated_row_count = 7 - - mock_client.read_rows.return_value = avro_blocks_2 - stream_position = bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"} - ) - - reader = class_under_test( - _pages_w_unavailable(avro_blocks_1), - mock_client, - stream_position, - {"metadata": {"test-key": "test-value"}}, - ) - got = reader.rows(read_session) - pages = iter(got.pages) - - assert got.total_rows is None - - page_1 = next(pages) - assert got.total_rows == 8 - assert page_1.num_items == 2 - assert page_1.remaining == 2 - assert tuple(page_1) == tuple(bq_blocks_1[0]) - assert page_1.num_items == 2 - assert page_1.remaining == 0 - - page_2 = next(pages) - assert got.total_rows == 6 - assert next(page_2) == bq_blocks_1[1][0] - assert page_2.num_items == 2 - assert page_2.remaining == 1 - assert next(page_2) == bq_blocks_1[1][1] - - page_3 = next(pages) - assert tuple(page_3) == tuple(bq_blocks_2[0]) - assert page_3.num_items == 2 - assert page_3.remaining == 0 - assert got.total_rows == 9 - - page_4 = next(pages) - assert got.total_rows == 7 - assert tuple(page_4) == tuple(bq_blocks_2[1]) - assert page_4.num_items == 1 - assert page_4.remaining == 0 - - -def test_to_arrow_no_pyarrow_raises_import_error( - mut, class_under_test, mock_client, monkeypatch -): - monkeypatch.setattr(mut, "pyarrow", None) - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test( - arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - - with pytest.raises(ImportError): - reader.to_arrow(read_session) - - with pytest.raises(ImportError): - reader.rows(read_session).to_arrow() - - with pytest.raises(ImportError): - next(reader.rows(read_session).pages).to_arrow() - - -def test_to_arrow_w_scalars_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test( - arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - actual_table = reader.to_arrow(read_session) - expected_table = pyarrow.Table.from_batches( - _bq_to_arrow_batch_objects(SCALAR_BLOCKS, arrow_schema) - ) - assert actual_table == expected_table - - -def test_to_dataframe_no_pandas_raises_import_error( - mut, class_under_test, mock_client, monkeypatch -): - monkeypatch.setattr(mut, "pandas", None) - avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) - read_session = _generate_avro_read_session(avro_schema) - avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) - - reader = class_under_test( - avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - - with pytest.raises(ImportError): - reader.to_dataframe(read_session) - - with pytest.raises(ImportError): - reader.rows(read_session).to_dataframe() - - with pytest.raises(ImportError): - next(reader.rows(read_session).pages).to_dataframe() - - -def test_to_dataframe_no_schema_set_raises_type_error( - mut, class_under_test, mock_client, monkeypatch -): - reader = class_under_test( - [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - read_session = bigquery_storage_v1beta1.types.ReadSession() - - with pytest.raises(TypeError): - reader.to_dataframe(read_session) - - -def test_to_dataframe_w_scalars(class_under_test): - avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) - read_session = _generate_avro_read_session(avro_schema) - avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) - - reader = class_under_test( - avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - got = reader.to_dataframe(read_session) - - expected = pandas.DataFrame( - list(itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES - ) - # fastavro provides its own UTC definition, so - # compare the timestamp columns separately. - got_ts = got["ts_col"] - got = got.drop(columns=["ts_col"]) - expected_ts = expected["ts_col"] - expected = expected.drop(columns=["ts_col"]) - - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - pandas.testing.assert_series_equal( - got_ts.reset_index(drop=True), - expected_ts.reset_index(drop=True), - check_dtype=False, # fastavro's UTC means different dtype - check_datetimelike_compat=True, - ) - - -def test_to_dataframe_w_scalars_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - - reader = class_under_test( - arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - got = reader.to_dataframe(read_session) - - expected = pandas.DataFrame( - list(itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES - ) - - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - -def test_to_dataframe_w_dtypes(class_under_test): - avro_schema = _bq_to_avro_schema( - [ - {"name": "bigfloat", "type": "float64"}, - {"name": "lilfloat", "type": "float64"}, - ] - ) - read_session = _generate_avro_read_session(avro_schema) - blocks = [ - [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], - [{"bigfloat": 3.75, "lilfloat": 11.0}], - ] - avro_blocks = _bq_to_avro_blocks(blocks, avro_schema) - - reader = class_under_test( - avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) - - expected = pandas.DataFrame( - { - "bigfloat": [1.25, 2.5, 3.75], - "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), - }, - columns=["bigfloat", "lilfloat"], - ) - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - -def test_to_dataframe_w_dtypes_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema( - [ - {"name": "bigfloat", "type": "float64"}, - {"name": "lilfloat", "type": "float64"}, - ] - ) - read_session = _generate_arrow_read_session(arrow_schema) - blocks = [ - [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], - [{"bigfloat": 3.75, "lilfloat": 11.0}], - ] - arrow_batches = _bq_to_arrow_batches(blocks, arrow_schema) - - reader = class_under_test( - arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} - ) - got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) - - expected = pandas.DataFrame( - { - "bigfloat": [1.25, 2.5, 3.75], - "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), - }, - columns=["bigfloat", "lilfloat"], - ) - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - -def test_to_dataframe_by_page(class_under_test, mock_client): - bq_columns = [ - {"name": "int_col", "type": "int64"}, - {"name": "bool_col", "type": "bool"}, - ] - avro_schema = _bq_to_avro_schema(bq_columns) - read_session = _generate_avro_read_session(avro_schema) - block_1 = [{"int_col": 123, "bool_col": True}, {"int_col": 234, "bool_col": False}] - block_2 = [{"int_col": 345, "bool_col": True}, {"int_col": 456, "bool_col": False}] - block_3 = [{"int_col": 567, "bool_col": True}, {"int_col": 789, "bool_col": False}] - block_4 = [{"int_col": 890, "bool_col": True}] - # Break blocks into two groups to test that iteration continues across - # reconnection. - bq_blocks_1 = [block_1, block_2] - bq_blocks_2 = [block_3, block_4] - avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema) - avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema) - - mock_client.read_rows.return_value = avro_blocks_2 - stream_position = bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"} - ) - - reader = class_under_test( - _pages_w_unavailable(avro_blocks_1), - mock_client, - stream_position, - {"metadata": {"test-key": "test-value"}}, - ) - got = reader.rows(read_session) - pages = iter(got.pages) - - page_1 = next(pages) - pandas.testing.assert_frame_equal( - page_1.to_dataframe().reset_index(drop=True), - pandas.DataFrame(block_1, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_2 = next(pages) - pandas.testing.assert_frame_equal( - page_2.to_dataframe().reset_index(drop=True), - pandas.DataFrame(block_2, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_3 = next(pages) - pandas.testing.assert_frame_equal( - page_3.to_dataframe().reset_index(drop=True), - pandas.DataFrame(block_3, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_4 = next(pages) - pandas.testing.assert_frame_equal( - page_4.to_dataframe().reset_index(drop=True), - pandas.DataFrame(block_4, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - -def test_to_dataframe_by_page_arrow(class_under_test, mock_client): - bq_columns = [ - {"name": "int_col", "type": "int64"}, - {"name": "bool_col", "type": "bool"}, - ] - arrow_schema = _bq_to_arrow_schema(bq_columns) - read_session = _generate_arrow_read_session(arrow_schema) - - bq_block_1 = [ - {"int_col": 123, "bool_col": True}, - {"int_col": 234, "bool_col": False}, - ] - bq_block_2 = [ - {"int_col": 345, "bool_col": True}, - {"int_col": 456, "bool_col": False}, - ] - bq_block_3 = [ - {"int_col": 567, "bool_col": True}, - {"int_col": 789, "bool_col": False}, - ] - bq_block_4 = [{"int_col": 890, "bool_col": True}] - # Break blocks into two groups to test that iteration continues across - # reconnection. - bq_blocks_1 = [bq_block_1, bq_block_2] - bq_blocks_2 = [bq_block_3, bq_block_4] - batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema) - batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema) - - mock_client.read_rows.return_value = batch_2 - - reader = class_under_test( - _pages_w_unavailable(batch_1), - mock_client, - bigquery_storage_v1beta1.types.StreamPosition(), - {}, - ) - got = reader.rows(read_session) - pages = iter(got.pages) - - page_1 = next(pages) - pandas.testing.assert_frame_equal( - page_1.to_dataframe( - dtypes={"int_col": "int64", "bool_col": "bool"} - ).reset_index(drop=True), - pandas.DataFrame(bq_block_1, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_2 = next(pages) - pandas.testing.assert_frame_equal( - page_2.to_dataframe().reset_index(drop=True), - pandas.DataFrame(bq_block_2, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_3 = next(pages) - pandas.testing.assert_frame_equal( - page_3.to_dataframe().reset_index(drop=True), - pandas.DataFrame(bq_block_3, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_4 = next(pages) - pandas.testing.assert_frame_equal( - page_4.to_dataframe().reset_index(drop=True), - pandas.DataFrame(bq_block_4, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - -def test_copy_stream_position(mut): - read_position = bigquery_storage_v1beta1.types.StreamPosition( - stream={"name": "test"}, offset=41 - ) - got = mut._copy_stream_position(read_position) - assert got == read_position - got.offset = 42 - assert read_position.offset == 41 - - -def test_copy_stream_position_w_dict(mut): - read_position = {"stream": {"name": "test"}, "offset": 42} - got = mut._copy_stream_position(read_position) - assert got.stream.name == "test" - assert got.offset == 42