-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Phase 1 for storing schemas for later use. #7761
Changes from 2 commits
9d3198b
894bb26
136cfcb
092e121
79e965b
22bf1ab
bb2ca79
5451852
f4ae0c1
5791049
fa331c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
|
||
import functools | ||
import gzip | ||
import io | ||
import json | ||
import os | ||
import tempfile | ||
import uuid | ||
|
@@ -50,6 +52,7 @@ | |
from google.cloud.bigquery.model import ModelReference | ||
from google.cloud.bigquery.query import _QueryResults | ||
from google.cloud.bigquery.retry import DEFAULT_RETRY | ||
from google.cloud.bigquery.schema import SchemaField | ||
from google.cloud.bigquery.table import _table_arg_to_table | ||
from google.cloud.bigquery.table import _table_arg_to_table_ref | ||
from google.cloud.bigquery.table import Table | ||
|
@@ -71,6 +74,9 @@ | |
_READ_LESS_THAN_SIZE = ( | ||
"Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." | ||
) | ||
_NEED_JSON_FILE_ARGUMENT = ( | ||
"The JSON file argument should be a file object or a file path" | ||
) | ||
_NEED_TABLE_ARGUMENT = ( | ||
"The table argument should be a table ID string, Table, or TableReference" | ||
) | ||
|
@@ -1929,6 +1935,60 @@ def list_rows( | |
) | ||
return row_iterator | ||
|
||
def schema_from_json(self, file_or_path): | ||
"""Takes a file object or file path that contains json that describes | ||
a table schema. | ||
|
||
Returns: | ||
List of schema field objects. | ||
""" | ||
file_obj = None | ||
json_data = None | ||
schema_field_list = list() | ||
|
||
if isinstance(file_or_path, io.IOBase): | ||
file_obj = file_or_path | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Since the above line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the else as recommended. |
||
try: | ||
file_obj = open(file_or_path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An important difference with this path is that we must close I recommend using a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added helper method per recommendation along with the statement to close the file. |
||
except OSError: | ||
raise TypeError(_NEED_JSON_FILE_ARGUMENT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general we use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated with correct exception. |
||
|
||
try: | ||
json_data = json.load(file_obj) | ||
except JSONDecodeError: | ||
raise TypeError(_NEED_JSON_FILE_ARGUMENT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer if we let this raise (don't catch it). That way the user doesn't lose context about what is wrong with the input file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Took this piece out. |
||
|
||
for field in json_data: | ||
schema_field = SchemaField.from_api_repr(field) | ||
schema_field_list.append(schema_field) | ||
|
||
return schema_field_list | ||
|
||
def schema_to_json(self, schema_list, destination): | ||
"""Takes a list of schema field objects. | ||
|
||
Serializes the list of schema field objects as json to a file. | ||
|
||
Destination is a file path or a file object. | ||
""" | ||
file_obj = None | ||
json_schema_list = list() | ||
|
||
if isinstance(destination, io.IOBase): | ||
file_obj = destination | ||
else: | ||
try: | ||
file_obj = open(destination, mode="w") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just as in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated with |
||
except OSError: | ||
raise TypeError(_NEED_JSON_FILE_ARGUMENT) | ||
|
||
for field in schema_list: | ||
schema_field = field.to_api_repr() | ||
json_schema_list.append(schema_field) | ||
|
||
file_obj.write(json.dumps(json_schema_list, indent=2, sort_keys=True)) | ||
|
||
|
||
# pylint: disable=unused-argument | ||
def _item_to_project(iterator, resource): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5161,3 +5161,84 @@ def test__do_multipart_upload_wrong_size(self): | |
|
||
with pytest.raises(ValueError): | ||
client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) | ||
|
||
def test__schema_from_json(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: since the function name is Ditto for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated test names appropriately. |
||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
expected = list() | ||
json_data = json.loads(file_content) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're basically repeating the function definition here. That's not ideal. You've basically written a change-detector test. I'd prefer to see actual I do like that you've mocked out There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made changes to include schema.SchemaField as requested. |
||
|
||
for field in json_data: | ||
schema_field = SchemaField.from_api_repr(field) | ||
expected.append(schema_field) | ||
|
||
client = self._make_client() | ||
mock_file_path = "/mocked/file.json" | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
open_patch = mock.patch( | ||
"builtins.open", new=mock.mock_open(read_data=file_content) | ||
) | ||
with open_patch as _mock_file: | ||
actual = client.schema_from_json(mock_file_path) | ||
_mock_file.assert_called_once_with(mock_file_path) | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
assert expected == actual | ||
|
||
def test__schema_to_json(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
schema_list = list() | ||
json_data = json.loads(file_content) | ||
|
||
for field in json_data: | ||
schema_field = SchemaField.from_api_repr(field) | ||
schema_list.append(schema_field) | ||
|
||
client = self._make_client() | ||
mock_file_path = "/mocked/file.json" | ||
|
||
open_patch = mock.patch("builtins.open", mock.mock_open()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the test logs, it looks like this is tripping up Python 2. https://stackoverflow.com/a/34677735/101923
Remember you can run against Python 2 by using We could check the Python version with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed all the issues and the code is compatible with python 2 now. |
||
with open_patch as _mock_file: | ||
actual = client.schema_to_json(schema_list, mock_file_path) | ||
_mock_file.assert_called_once_with(mock_file_path, mode="w") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might need |
||
_mock_file().write.assert_called_once_with(file_content) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: This and the other docstrings (except for the first line) look a like they are indented 4 spaces too many.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed indentations.