diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 86e131438f32..bb6a375975f2 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -23,6 +23,8 @@ import functools import gzip +import io +import json import os import tempfile import uuid @@ -50,6 +52,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _table_arg_to_table from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import Table @@ -1929,6 +1932,50 @@ def list_rows( ) return row_iterator + def _schema_from_json_file_object(self, file_obj): + """Helper function for schema_from_json that takes a + file object that describes a table schema. + + Returns: + List of schema field objects. + """ + json_data = json.load(file_obj) + return [SchemaField.from_api_repr(field) for field in json_data] + + def _schema_to_json_file_object(self, schema_list, file_obj): + """Helper function for schema_to_json that takes a schema list and file + object and writes the schema list to the file object with json.dump + """ + json.dump(schema_list, file_obj, indent=2, sort_keys=True) + + def schema_from_json(self, file_or_path): + """Takes a file object or file path that contains json that describes + a table schema. + + Returns: + List of schema field objects. + """ + if isinstance(file_or_path, io.IOBase): + return self._schema_from_json_file_object(file_or_path) + + with open(file_or_path) as file_obj: + return self._schema_from_json_file_object(file_obj) + + def schema_to_json(self, schema_list, destination): + """Takes a list of schema field objects. + + Serializes the list of schema field objects as json to a file. + + Destination is a file path or a file object. + """ + json_schema_list = [f.to_api_repr() for f in schema_list] + + if isinstance(destination, io.IOBase): + return self._schema_to_json_file_object(json_schema_list, destination) + + with open(destination, mode="w") as file_obj: + return self._schema_to_json_file_object(json_schema_list, file_obj) + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 46734079a03d..45e80f1a37a3 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -5161,3 +5161,183 @@ def test__do_multipart_upload_wrong_size(self): with pytest.raises(ValueError): client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) + + def test_schema_from_json_with_file_path(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = """[ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + client = self._make_client() + mock_file_path = "/mocked/file.json" + + if six.PY2: + open_patch = mock.patch( + "__builtin__.open", mock.mock_open(read_data=file_content) + ) + else: + open_patch = mock.patch( + "builtins.open", new=mock.mock_open(read_data=file_content) + ) + + with open_patch as _mock_file: + actual = client.schema_from_json(mock_file_path) + _mock_file.assert_called_once_with(mock_file_path) + # This assert is to make sure __exit__ is called in the context + # manager that opens the file in the function + _mock_file().__exit__.assert_called_once() + + assert expected == actual + + def test_schema_from_json_with_file_object(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = """[ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + client = self._make_client() + + if six.PY2: + fake_file = io.BytesIO(file_content) + else: + fake_file = io.StringIO(file_content) + + actual = client.schema_from_json(fake_file) + + assert expected == actual + + def test_schema_to_json_with_file_path(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING", + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING", + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT", + }, + ] + + schema_list = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + client = self._make_client() + mock_file_path = "/mocked/file.json" + + if six.PY2: + open_patch = mock.patch("__builtin__.open", mock.mock_open()) + else: + open_patch = mock.patch("builtins.open", mock.mock_open()) + + with open_patch as mock_file, mock.patch("json.dump") as mock_dump: + client.schema_to_json(schema_list, mock_file_path) + mock_file.assert_called_once_with(mock_file_path, mode="w") + # This assert is to make sure __exit__ is called in the context + # manager that opens the file in the function + mock_file().__exit__.assert_called_once() + mock_dump.assert_called_with( + file_content, mock_file.return_value, indent=2, sort_keys=True + ) + + def test_schema_to_json_with_file_object(self): + from google.cloud.bigquery.schema import SchemaField + + file_content = [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING", + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING", + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT", + }, + ] + + schema_list = [ + SchemaField("qtr", "STRING", "REQUIRED", "quarter"), + SchemaField("rep", "STRING", "NULLABLE", "sales representative"), + SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + ] + + if six.PY2: + fake_file = io.BytesIO() + else: + fake_file = io.StringIO() + + client = self._make_client() + + client.schema_to_json(schema_list, fake_file) + assert file_content == json.loads(fake_file.getvalue())