From d5a0f5f532f2a3739748db8e297cbebf75e1597d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 4 Sep 2019 19:31:22 +0200 Subject: [PATCH] Deprecate automatic schema conversion --- bigquery/google/cloud/bigquery/client.py | 10 +++++++ bigquery/tests/unit/test_client.py | 35 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index c33e119cbc74..bcc93c0b7273 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1572,6 +1572,16 @@ def load_table_from_dataframe( dataframe, job_config.schema ) + if not job_config.schema: + # the schema could not be fully detected + warnings.warn( + "Schema could not be detected for all columns. Loading from a " + "dataframe without a schema will be deprecated in the future, " + "please provide a schema.", + PendingDeprecationWarning, + stacklevel=2, + ) + tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) os.close(tmpfd) diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index da3cee11e5d0..3ee45f52405c 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -5579,6 +5579,41 @@ def test_load_table_from_dataframe_unknown_table(self): job_config=mock.ANY, ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_no_schema_warning(self): + client = self._make_client() + + # Pick at least one column type that translates to Pandas dtype + # "object". A string column matches that. + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + catch_warnings = warnings.catch_warnings(record=True) + + with get_table_patch, load_patch, pyarrow_patch, catch_warnings as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + matches = [ + warning + for warning in warned + if warning.category in (DeprecationWarning, PendingDeprecationWarning) + and "could not be detected" in str(warning) + and "please provide a schema" in str(warning) + ] + assert matches, "A missing schema deprecation warning was not raised." + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields_error(self):