-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Phase 1 for storing schemas for later use. #7761
Changes from 6 commits
9d3198b
894bb26
136cfcb
092e121
79e965b
22bf1ab
bb2ca79
5451852
f4ae0c1
5791049
fa331c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
|
||
import functools | ||
import gzip | ||
import io | ||
import json | ||
import os | ||
import tempfile | ||
import uuid | ||
|
@@ -50,6 +52,7 @@ | |
from google.cloud.bigquery.model import ModelReference | ||
from google.cloud.bigquery.query import _QueryResults | ||
from google.cloud.bigquery.retry import DEFAULT_RETRY | ||
from google.cloud.bigquery.schema import SchemaField | ||
from google.cloud.bigquery.table import _table_arg_to_table | ||
from google.cloud.bigquery.table import _table_arg_to_table_ref | ||
from google.cloud.bigquery.table import Table | ||
|
@@ -71,6 +74,9 @@ | |
_READ_LESS_THAN_SIZE = ( | ||
"Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." | ||
) | ||
_NEED_JSON_FILE_ARGUMENT = ( | ||
"The JSON file argument should be a file object or a file path" | ||
) | ||
_NEED_TABLE_ARGUMENT = ( | ||
"The table argument should be a table ID string, Table, or TableReference" | ||
) | ||
|
@@ -1929,6 +1935,56 @@ def list_rows( | |
) | ||
return row_iterator | ||
|
||
def _schema_from_json_file_object(self, file_obj): | ||
"""Helper function for schema_from_json that takes a | ||
file object that describes a table schema. | ||
|
||
Returns: | ||
List of schema field objects. | ||
""" | ||
json_data = json.load(file_obj) | ||
return [SchemaField.from_api_repr(f) for f in json_data] | ||
|
||
def _schema_to_json_file_object(self, schema_list, file_obj): | ||
"""Helper function for schema_to_json that takes a schema list and file | ||
object and writes the schema list to the file object with json.dump | ||
""" | ||
json.dump(schema_list, file_obj, indent=2, sort_keys=True) | ||
|
||
def schema_from_json(self, file_or_path): | ||
"""Takes a file object or file path that contains json that describes | ||
a table schema. | ||
|
||
Returns: | ||
List of schema field objects. | ||
""" | ||
if isinstance(file_or_path, io.IOBase): | ||
return self._schema_from_json_file_object(file_or_path) | ||
|
||
try: | ||
with open(file_or_path) as file_obj: | ||
return self._schema_from_json_file_object(file_obj) | ||
except OSError: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, coverage is failing on this line and the similar line in the other function. We'd need a test where Honestly, I'd be okay removing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the |
||
raise ValueError(_NEED_JSON_FILE_ARGUMENT) | ||
|
||
def schema_to_json(self, schema_list, destination): | ||
"""Takes a list of schema field objects. | ||
|
||
Serializes the list of schema field objects as json to a file. | ||
|
||
Destination is a file path or a file object. | ||
""" | ||
json_schema_list = [f.to_api_repr() for f in schema_list] | ||
|
||
if isinstance(destination, io.IOBase): | ||
return self._schema_to_json_file_object(json_schema_list, destination) | ||
|
||
try: | ||
with open(destination, mode="w") as file_obj: | ||
return self._schema_to_json_file_object(json_schema_list, file_obj) | ||
except OSError: | ||
raise ValueError(_NEED_JSON_FILE_ARGUMENT) | ||
|
||
|
||
# pylint: disable=unused-argument | ||
def _item_to_project(iterator, resource): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5161,3 +5161,166 @@ def test__do_multipart_upload_wrong_size(self): | |
|
||
with pytest.raises(ValueError): | ||
client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) | ||
|
||
def test_schema_from_json_with_file_path(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
|
||
expected = [ | ||
SchemaField("qtr", "STRING", "REQUIRED", "quarter"), | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative"), | ||
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), | ||
] | ||
|
||
client = self._make_client() | ||
mock_file_path = "/mocked/file.json" | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
open_patch = mock.patch( | ||
"builtins.open", new=mock.mock_open(read_data=file_content) | ||
) | ||
|
||
with open_patch as _mock_file: | ||
actual = client.schema_from_json(mock_file_path) | ||
_mock_file.assert_called_once_with(mock_file_path) | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# This assert is to make sure __exit__ is called in the context | ||
# manager that opens the file in the function | ||
_mock_file().__exit__.assert_called_once() | ||
|
||
assert expected == actual | ||
|
||
def test_schema_from_json_with_file_object(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
|
||
expected = [ | ||
SchemaField("qtr", "STRING", "REQUIRED", "quarter"), | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative"), | ||
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), | ||
] | ||
|
||
client = self._make_client() | ||
|
||
fake_file = io.StringIO(file_content) | ||
actual = client.schema_from_json(fake_file) | ||
|
||
assert expected == actual | ||
|
||
def test_schema_to_json_with_file_path(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = [ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING", | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING", | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT", | ||
}, | ||
] | ||
|
||
schema_list = [ | ||
SchemaField("qtr", "STRING", "REQUIRED", "quarter"), | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative"), | ||
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), | ||
] | ||
|
||
client = self._make_client() | ||
mock_file_path = "/mocked/file.json" | ||
|
||
open_patch = mock.patch("builtins.open", mock.mock_open()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the test logs, it looks like this is tripping up Python 2. https://stackoverflow.com/a/34677735/101923
Remember you can run against Python 2 by using We could check the Python version with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed all the issues and the code is compatible with python 2 now. |
||
with open_patch as _mock_file: | ||
with mock.patch("json.dump") as _mock_dump: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like your thinking here. It's definitely nicer to compare lists than it is to compare JSON strings. Nit: Let's write both Nit: No need for leading underscore, that's usually to indicate a "private" variable, which isn't relevant inside a test.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace existing |
||
client.schema_to_json(schema_list, mock_file_path) | ||
_mock_file.assert_called_once_with(mock_file_path, mode="w") | ||
# This assert is to make sure __exit__ is called in the context | ||
# manager that opens the file in the function | ||
_mock_file().__exit__.assert_called_once() | ||
_mock_dump.assert_called_with( | ||
file_content, _mock_file.return_value, indent=2, sort_keys=True | ||
) | ||
|
||
def test_schema_to_json_with_file_object(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
schema_list = [ | ||
SchemaField("qtr", "STRING", "REQUIRED", "quarter"), | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative"), | ||
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), | ||
] | ||
|
||
fake_file = io.StringIO() | ||
client = self._make_client() | ||
|
||
client.schema_to_json(schema_list, fake_file) | ||
assert file_content == fake_file.getvalue() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test has been updated with the list of dictionaries and updated assert. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wonderful!
One nit-pick, though. Style-wise in our client libraries and samples, we avoid single-letter variable names, even in list comprehensions. Let's rename
f
tofield
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Variable name has been updated!