From 7e36fda0e4c76bb38f2839b40d2bfb10b55ec3cd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 27 Jan 2020 01:31:47 -0600 Subject: [PATCH] deprecate(bigquery): deprecate `client.dataset()` in favor of `DatasetReference` (#7753) * deprecate(bigquery): deprecate `client.dataset()` in favor of DatasetReference Now that all client methods that take a `DatasetReference` or `TableReference` also take a string, the `client.dataset()` method is unnecessary and confusing. * fix: remove unnecessary textwrap * Update bigquery/google/cloud/bigquery/client.py Co-Authored-By: Peter Lamut * docs: use sphinx deprecation directive Co-authored-by: Peter Lamut --- bigquery/google/cloud/bigquery/client.py | 26 ++++- bigquery/google/cloud/bigquery/magics.py | 3 +- bigquery/tests/unit/test_client.py | 126 ++++++++++++++--------- 3 files changed, 104 insertions(+), 51 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 83e6bf8045ed..5da12990b390 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -354,7 +354,20 @@ def list_datasets( ) def dataset(self, dataset_id, project=None): - """Construct a reference to a dataset. + """Deprecated: Construct a reference to a dataset. + + .. deprecated:: 1.24.0 + Construct a + :class:`~google.cloud.bigquery.dataset.DatasetReference` using its + constructor or use a string where previously a reference object + was used. + + As of ``google-cloud-bigquery`` version 1.7.0, all client methods + that take a + :class:`~google.cloud.bigquery.dataset.DatasetReference` or + :class:`~google.cloud.bigquery.table.TableReference` also take a + string in standard SQL format, e.g. ``project.dataset_id`` or + ``project.dataset_id.table_id``. Args: dataset_id (str): ID of the dataset. @@ -370,6 +383,13 @@ def dataset(self, dataset_id, project=None): if project is None: project = self.project + warnings.warn( + "Client.dataset is deprecated and will be removed in a future version. " + "Use a string like 'my_project.my_dataset' or a " + "cloud.google.bigquery.DatasetReference object, instead.", + PendingDeprecationWarning, + stacklevel=2, + ) return DatasetReference(project, dataset_id) def _create_bqstorage_client(self): @@ -419,7 +439,7 @@ def create_dataset( >>> from google.cloud import bigquery >>> client = bigquery.Client() - >>> dataset = bigquery.Dataset(client.dataset('my_dataset')) + >>> dataset = bigquery.Dataset('my_project.my_dataset') >>> dataset = client.create_dataset(dataset) """ @@ -2584,7 +2604,7 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): ) as guard: meta_table = self.get_table( TableReference( - self.dataset(table.dataset_id, project=table.project), + DatasetReference(table.project, table.dataset_id), "%s$__PARTITIONS_SUMMARY__" % table.table_id, ), retry=retry, diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 5ca6817a99c6..39608b19fcde 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -153,6 +153,7 @@ from google.api_core.exceptions import NotFound import google.auth from google.cloud import bigquery +import google.cloud.bigquery.dataset from google.cloud.bigquery.dbapi import _helpers import six @@ -534,7 +535,7 @@ def _cell_magic(line, query): ) dataset_id, table_id = split job_config.allow_large_results = True - dataset_ref = client.dataset(dataset_id) + dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id) destination_table_ref = dataset_ref.table(table_id) job_config.destination = destination_table_ref job_config.create_disposition = "CREATE_IF_NEEDED" diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 6b40d8a020a4..952c876dff39 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -554,7 +554,19 @@ def test_dataset_with_specified_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - dataset = client.dataset(self.DS_ID, self.PROJECT) + catch_warnings = warnings.catch_warnings(record=True) + + with catch_warnings as warned: + dataset = client.dataset(self.DS_ID, self.PROJECT) + + matches = [ + warning + for warning in warned + if warning.category in (DeprecationWarning, PendingDeprecationWarning) + and "Client.dataset" in str(warning) + and "my_project.my_dataset" in str(warning) + ] + assert matches, "A Client.dataset deprecation warning was not raised." self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -565,7 +577,19 @@ def test_dataset_with_default_project(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - dataset = client.dataset(self.DS_ID) + catch_warnings = warnings.catch_warnings(record=True) + + with catch_warnings as warned: + dataset = client.dataset(self.DS_ID) + + matches = [ + warning + for warning in warned + if warning.category in (DeprecationWarning, PendingDeprecationWarning) + and "Client.dataset" in str(warning) + and "my_project.my_dataset" in str(warning) + ] + assert matches, "A Client.dataset deprecation warning was not raised." self.assertIsInstance(dataset, DatasetReference) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -582,7 +606,7 @@ def test_get_dataset(self): "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, } conn = client._connection = make_connection(resource) - dataset_ref = client.dataset(self.DS_ID) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) dataset = client.get_dataset(dataset_ref, timeout=7.5) @@ -660,7 +684,7 @@ def test_create_dataset_minimal(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(RESOURCE) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) after = client.create_dataset(before, timeout=7.5) @@ -716,7 +740,7 @@ def test_create_dataset_w_attrs(self): AccessEntry(None, "view", VIEW), ] - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before.access_entries = entries before.description = DESCRIPTION @@ -772,7 +796,7 @@ def test_create_dataset_w_custom_property(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before._properties["newAlphaProperty"] = "unreleased property" @@ -812,7 +836,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(self): ) conn = client._connection = make_connection(RESOURCE) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) after = client.create_dataset(before) @@ -854,7 +878,7 @@ def test_create_dataset_w_client_location_w_dataset_location(self): ) conn = client._connection = make_connection(RESOURCE) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) before = Dataset(ds_ref) before.location = OTHER_LOCATION @@ -894,7 +918,7 @@ def test_create_dataset_w_reference(self): ) conn = client._connection = make_connection(resource) - dataset = client.create_dataset(client.dataset(self.DS_ID)) + dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, self.PROJECT) @@ -1567,7 +1591,7 @@ def test_get_model(self): } conn = client._connection = make_connection(resource) - model_ref = client.dataset(self.DS_ID).model(self.MODEL_ID) + model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) got = client.get_model(model_ref, timeout=7.5) conn.api_request.assert_called_once_with( @@ -1705,7 +1729,9 @@ def test_update_dataset_w_invalid_field(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(ValueError): - client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) + client.update_dataset( + Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] + ) def test_update_dataset(self): from google.cloud.bigquery.dataset import Dataset, AccessEntry @@ -1730,7 +1756,7 @@ def test_update_dataset(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(RESOURCE, RESOURCE) - ds = Dataset(client.dataset(self.DS_ID)) + ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) ds.description = DESCRIPTION ds.friendly_name = FRIENDLY_NAME ds.location = LOCATION @@ -1780,7 +1806,7 @@ def test_update_dataset_w_custom_property(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - dataset = Dataset(client.dataset(self.DS_ID)) + dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) dataset._properties["newAlphaProperty"] = "unreleased property" dataset = client.update_dataset(dataset, ["newAlphaProperty"]) @@ -2216,7 +2242,7 @@ def test_list_tables_empty_w_timeout(self): client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables(dataset, timeout=7.5) self.assertIs(iterator.dataset, dataset) page = six.next(iterator.pages) @@ -2277,7 +2303,7 @@ def test_list_models_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(DATA) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_models(dataset) self.assertIs(iterator.dataset, dataset) @@ -2299,7 +2325,7 @@ def test_list_models_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_models(client.dataset(self.DS_ID).model("foo")) + client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo")) def test_list_routines_empty_w_timeout(self): creds = _make_credentials() @@ -2352,7 +2378,7 @@ def test_list_routines_defaults(self): creds = _make_credentials() client = self._make_one(project=project_id, credentials=creds) conn = client._connection = make_connection(resource) - dataset = client.dataset(dataset_id) + dataset = DatasetReference(client.project, dataset_id) iterator = client.list_routines(dataset) self.assertIs(iterator.dataset, dataset) @@ -2376,7 +2402,9 @@ def test_list_routines_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_routines(client.dataset(self.DS_ID).table("foo")) + client.list_routines( + DatasetReference(self.PROJECT, self.DS_ID).table("foo") + ) def test_list_tables_defaults(self): from google.cloud.bigquery.table import TableListItem @@ -2414,7 +2442,7 @@ def test_list_tables_defaults(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(DATA) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables(dataset) self.assertIs(iterator.dataset, dataset) @@ -2468,7 +2496,7 @@ def test_list_tables_explicit(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(DATA) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) iterator = client.list_tables( # Test with string for dataset ID. @@ -2499,7 +2527,7 @@ def test_list_tables_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.list_tables(client.dataset(self.DS_ID).table("foo")) + client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo")) def test_delete_dataset(self): from google.cloud.bigquery.dataset import Dataset @@ -2524,7 +2552,7 @@ def test_delete_dataset_delete_contents(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection({}, {}) - ds_ref = client.dataset(self.DS_ID) + ds_ref = DatasetReference(self.PROJECT, self.DS_ID) for arg in (ds_ref, Dataset(ds_ref)): client.delete_dataset(arg, delete_contents=True) conn.api_request.assert_called_with( @@ -2538,7 +2566,9 @@ def test_delete_dataset_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_dataset(client.dataset(self.DS_ID).table("foo")) + client.delete_dataset( + DatasetReference(self.PROJECT, self.DS_ID).table("foo") + ) def test_delete_dataset_w_not_found_ok_false(self): path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) @@ -2585,7 +2615,7 @@ def test_delete_model(self): model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) models = ( model_id, - client.dataset(self.DS_ID).model(self.MODEL_ID), + DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID), Model(model_id), ) conn = client._connection = make_connection(*([{}] * len(models))) @@ -2600,7 +2630,7 @@ def test_delete_model_w_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_model(client.dataset(self.DS_ID)) + client.delete_model(DatasetReference(self.PROJECT, self.DS_ID)) def test_delete_model_w_not_found_ok_false(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2662,7 +2692,7 @@ def test_delete_routine_w_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_routine(client.dataset(self.DS_ID)) + client.delete_routine(DatasetReference(self.PROJECT, self.DS_ID)) def test_delete_routine_w_not_found_ok_false(self): creds = _make_credentials() @@ -2731,7 +2761,7 @@ def test_delete_table_w_wrong_type(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with self.assertRaises(TypeError): - client.delete_table(client.dataset(self.DS_ID)) + client.delete_table(DatasetReference(self.PROJECT, self.DS_ID)) def test_delete_table_w_not_found_ok_false(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -3237,7 +3267,7 @@ def test_load_table_from_uri(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - destination = client.dataset(self.DS_ID).table(DESTINATION) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) job = client.load_table_from_uri( SOURCE_URI, destination, job_id=JOB, job_config=job_config, timeout=7.5 @@ -3295,7 +3325,7 @@ def test_load_table_from_uri_w_explicit_project(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(resource) - destination = client.dataset(self.DS_ID).table(destination_id) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(destination_id) client.load_table_from_uri( source_uri, @@ -3368,7 +3398,7 @@ def test_load_table_from_uri_w_invalid_job_config(self): http = object() job_config = job.CopyJobConfig() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - destination = client.dataset(self.DS_ID).table(DESTINATION) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) with self.assertRaises(TypeError) as exc: client.load_table_from_uri( @@ -3551,7 +3581,7 @@ def test_copy_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -3611,7 +3641,7 @@ def test_copy_table_w_explicit_project(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(resource) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(source_id) destination = dataset.table(destination_id) @@ -3689,23 +3719,25 @@ def test_copy_table_w_source_strings(self): sources = [ "dataset_wo_proj.some_table", "other_project.other_dataset.other_table", - client.dataset("dataset_from_ref").table("table_from_ref"), + DatasetReference(client.project, "dataset_from_ref").table( + "table_from_ref" + ), ] destination = "some_project.some_dataset.destination_table" job = client.copy_table(sources, destination) expected_sources = [ - client.dataset("dataset_wo_proj").table("some_table"), - client.dataset("other_dataset", project="other_project").table( - "other_table" + DatasetReference(client.project, "dataset_wo_proj").table("some_table"), + DatasetReference("other_project", "other_dataset").table("other_table"), + DatasetReference(client.project, "dataset_from_ref").table( + "table_from_ref" ), - client.dataset("dataset_from_ref").table("table_from_ref"), ] self.assertEqual(list(job.sources), expected_sources) - expected_destination = client.dataset( - "some_dataset", project="some_project" - ).table("destination_table") + expected_destination = DatasetReference("some_project", "some_dataset").table( + "destination_table" + ) self.assertEqual(job.destination, expected_destination) def test_copy_table_w_invalid_job_config(self): @@ -3719,7 +3751,7 @@ def test_copy_table_w_invalid_job_config(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) job_config = job.ExtractJobConfig() - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) with self.assertRaises(TypeError) as exc: @@ -3756,7 +3788,7 @@ def test_copy_table_w_valid_job_config(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) @@ -3799,7 +3831,7 @@ def test_extract_table(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, DESTINATION, job_id=JOB, timeout=7.5) @@ -3826,7 +3858,7 @@ def test_extract_table_w_invalid_job_config(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job_config = job.LoadJobConfig() with self.assertRaises(TypeError) as exc: @@ -3859,7 +3891,7 @@ def test_extract_table_w_explicit_project(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(resource) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(source_id) client.extract_table( @@ -3948,7 +3980,7 @@ def test_extract_table_generated_job_id(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job_config = ExtractJobConfig() job_config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON @@ -3997,7 +4029,7 @@ def test_extract_table_w_destination_uris(self): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) conn = client._connection = make_connection(RESOURCE) - dataset = client.dataset(self.DS_ID) + dataset = DatasetReference(self.PROJECT, self.DS_ID) source = dataset.table(SOURCE) job = client.extract_table(source, [DESTINATION1, DESTINATION2], job_id=JOB)