Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Add debug logging statements to track when BQ Storage API is used. #8838

Merged
merged 2 commits into from
Aug 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bigquery/google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import concurrent.futures
import functools
import logging
import warnings

from six.moves import queue
Expand All @@ -39,6 +40,8 @@
from google.cloud.bigquery import schema


_LOGGER = logging.getLogger(__name__)

_NO_BQSTORAGE_ERROR = (
"The google-cloud-bigquery-storage library is not installed, "
"please install google-cloud-bigquery-storage to use bqstorage features."
Expand Down Expand Up @@ -341,6 +344,11 @@ def _download_table_bqstorage(
read_options=read_options,
requested_streams=requested_streams,
)
_LOGGER.debug(
"Started reading table '{}.{}.{}' with BQ Storage API session '{}'.".format(
table.project, table.dataset_id, table.table_id, session.name
)
)

# Avoid reading rows from an empty table.
if not session.streams:
Expand Down
8 changes: 8 additions & 0 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import copy
import datetime
import functools
import logging
import operator
import warnings

Expand Down Expand Up @@ -56,6 +57,8 @@
from google.cloud.bigquery.external_config import ExternalConfig


_LOGGER = logging.getLogger(__name__)

_NO_BQSTORAGE_ERROR = (
"The google-cloud-bigquery-storage library is not installed, "
"please install google-cloud-bigquery-storage to use bqstorage features."
Expand Down Expand Up @@ -1426,6 +1429,11 @@ def _to_page_iterable(
# with the tabledata.list API.
pass

_LOGGER.debug(
"Started reading table '{}.{}.{}' with tabledata.list.".format(
self._table.project, self._table.dataset_id, self._table.table_id
)
)
for item in tabledata_list_download():
yield item

Expand Down
81 changes: 64 additions & 17 deletions bigquery/tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import itertools
import json
import logging
import time
import unittest
import warnings
Expand Down Expand Up @@ -1445,8 +1446,16 @@ def _class_under_test(self):
return RowIterator

def _make_one(
self, client=None, api_request=None, path=None, schema=None, **kwargs
self,
client=None,
api_request=None,
path=None,
schema=None,
table=None,
**kwargs
):
from google.cloud.bigquery.table import TableReference

if client is None:
client = _mock_client()

Expand All @@ -1459,7 +1468,12 @@ def _make_one(
if schema is None:
schema = []

return self._class_under_test()(client, api_request, path, schema, **kwargs)
if table is None:
table = TableReference.from_string("my-project.my_dataset.my_table")

return self._class_under_test()(
client, api_request, path, schema, table=table, **kwargs
)

def test_constructor(self):
from google.cloud.bigquery.table import _item_to_row
Expand Down Expand Up @@ -2071,16 +2085,32 @@ def test_to_dataframe_w_empty_results(self):
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
path = "/foo"
api_request = mock.Mock(return_value={"rows": []})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
row_iterator = self._make_one(_mock_client(), api_request, schema=schema)

df = row_iterator.to_dataframe()

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 0) # verify the number of rows
self.assertEqual(list(df), ["name", "age"]) # verify the column names

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_logs_tabledata_list(self):
from google.cloud.bigquery.table import Table

mock_logger = mock.create_autospec(logging.Logger)
api_request = mock.Mock(return_value={"rows": []})
row_iterator = self._make_one(
_mock_client(), api_request, table=Table("debug-proj.debug_dset.debug_tbl")
)

with mock.patch("google.cloud.bigquery.table._LOGGER", mock_logger):
row_iterator.to_dataframe()

mock_logger.debug.assert_any_call(
"Started reading table 'debug-proj.debug_dset.debug_tbl' with tabledata.list."
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_various_types_nullable(self):
import datetime
Expand Down Expand Up @@ -2191,23 +2221,13 @@ def test_to_dataframe_w_bqstorage_no_streams(self):
bigquery_storage_v1beta1.BigQueryStorageClient
)
session = bigquery_storage_v1beta1.types.ReadSession()
session.avro_schema.schema = json.dumps(
{
"fields": [
{"name": "colA"},
# Not alphabetical to test column order.
{"name": "colC"},
{"name": "colB"},
]
}
)
bqstorage_client.create_read_session.return_value = session

row_iterator = mut.RowIterator(
_mock_client(),
None, # api_request: ignored
None, # path: ignored
[
api_request=None,
path=None,
schema=[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing the args as keyword arguments makes this more readable. 👍

schema.SchemaField("colA", "IGNORED"),
schema.SchemaField("colC", "IGNORED"),
schema.SchemaField("colB", "IGNORED"),
Expand All @@ -2220,6 +2240,33 @@ def test_to_dataframe_w_bqstorage_no_streams(self):
self.assertEqual(list(got), column_names)
self.assertTrue(got.empty)

@unittest.skipIf(
bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`"
)
@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_to_dataframe_w_bqstorage_logs_session(self):
from google.cloud.bigquery.table import Table

bqstorage_client = mock.create_autospec(
bigquery_storage_v1beta1.BigQueryStorageClient
)
session = bigquery_storage_v1beta1.types.ReadSession()
session.name = "projects/test-proj/locations/us/sessions/SOMESESSION"
bqstorage_client.create_read_session.return_value = session
mock_logger = mock.create_autospec(logging.Logger)
row_iterator = self._make_one(
_mock_client(), table=Table("debug-proj.debug_dset.debug_tbl")
)

with mock.patch("google.cloud.bigquery._pandas_helpers._LOGGER", mock_logger):
row_iterator.to_dataframe(bqstorage_client=bqstorage_client)

mock_logger.debug.assert_any_call(
"Started reading table 'debug-proj.debug_dset.debug_tbl' "
"with BQ Storage API session 'projects/test-proj/locations/us/sessions/SOMESESSION'."
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(
bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`"
Expand Down