-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Phase 1 for storing schemas for later use. #7761
Changes from 3 commits
9d3198b
894bb26
136cfcb
092e121
79e965b
22bf1ab
bb2ca79
5451852
f4ae0c1
5791049
fa331c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
|
||
import functools | ||
import gzip | ||
import io | ||
import json | ||
import os | ||
import tempfile | ||
import uuid | ||
|
@@ -50,6 +52,7 @@ | |
from google.cloud.bigquery.model import ModelReference | ||
from google.cloud.bigquery.query import _QueryResults | ||
from google.cloud.bigquery.retry import DEFAULT_RETRY | ||
from google.cloud.bigquery.schema import SchemaField | ||
from google.cloud.bigquery.table import _table_arg_to_table | ||
from google.cloud.bigquery.table import _table_arg_to_table_ref | ||
from google.cloud.bigquery.table import Table | ||
|
@@ -71,6 +74,9 @@ | |
_READ_LESS_THAN_SIZE = ( | ||
"Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." | ||
) | ||
_NEED_JSON_FILE_ARGUMENT = ( | ||
"The JSON file argument should be a file object or a file path" | ||
) | ||
_NEED_TABLE_ARGUMENT = ( | ||
"The table argument should be a table ID string, Table, or TableReference" | ||
) | ||
|
@@ -1929,6 +1935,61 @@ def list_rows( | |
) | ||
return row_iterator | ||
|
||
def _schema_from_json_file_object(self, file): | ||
"""Helper function for schema_from_json that takes a | ||
file object that describes a table schema. | ||
|
||
Returns: | ||
List of schema field objects. | ||
""" | ||
schema_field_list = list() | ||
json_data = json.load(file) | ||
|
||
for field in json_data: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI: This loop could be replaced with a Python "list comprehension". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to be a list comprehension. |
||
schema_field = SchemaField.from_api_repr(field) | ||
schema_field_list.append(schema_field) | ||
|
||
return schema_field_list | ||
|
||
def schema_from_json(self, file_or_path): | ||
"""Takes a file object or file path that contains json that describes | ||
a table schema. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: This and the other docstrings (except for the first line) look a like they are indented 4 spaces too many. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed indentations. |
||
|
||
Returns: | ||
List of schema field objects. | ||
""" | ||
if isinstance(file_or_path, io.IOBase): | ||
return self._schema_from_json_file_object(file_or_path) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Since the above line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the else as recommended. |
||
try: | ||
with open(file_or_path) as file: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated name accordingly. |
||
return self._schema_from_json_file_object(file) | ||
except OSError: | ||
raise ValueError(_NEED_JSON_FILE_ARGUMENT) | ||
|
||
def schema_to_json(self, schema_list, destination): | ||
"""Takes a list of schema field objects. | ||
|
||
Serializes the list of schema field objects as json to a file. | ||
|
||
Destination is a file path or a file object. | ||
""" | ||
file_obj = None | ||
json_schema_list = list() | ||
|
||
for field in schema_list: | ||
schema_field = field.to_api_repr() | ||
json_schema_list.append(schema_field) | ||
|
||
if isinstance(destination, io.IOBase): | ||
destination.write(json.dumps(json_schema_list, indent=2, sort_keys=True)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use I'd prefer if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Refactored into a helper function because I couldn't get it to work with the context manager below just by removing the else. |
||
else: | ||
try: | ||
with open(destination, mode="w") as file: | ||
file.write(json.dumps(json_schema_list, indent=2, sort_keys=True)) | ||
except OSError: | ||
raise ValueError(_NEED_JSON_FILE_ARGUMENT) | ||
|
||
|
||
# pylint: disable=unused-argument | ||
def _item_to_project(iterator, resource): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5161,3 +5161,166 @@ def test__do_multipart_upload_wrong_size(self): | |
|
||
with pytest.raises(ValueError): | ||
client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) | ||
|
||
def test_schema_from_json_with_file_path(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
|
||
expected = list() | ||
expected.append(SchemaField("qtr", "STRING", "REQUIRED", "quarter")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switched to suggested method. |
||
expected.append( | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative") | ||
) | ||
expected.append(SchemaField("sales", "FLOAT", "NULLABLE", "total sales")) | ||
|
||
client = self._make_client() | ||
mock_file_path = "/mocked/file.json" | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
open_patch = mock.patch( | ||
"builtins.open", new=mock.mock_open(read_data=file_content) | ||
) | ||
|
||
with open_patch as _mock_file: | ||
actual = client.schema_from_json(mock_file_path) | ||
_mock_file.assert_called_once_with(mock_file_path) | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# This assert is to make sure __exit__ is called in the context | ||
# manager that opens the file in the function | ||
_mock_file().__exit__.assert_called_once_with(None, None, None) | ||
|
||
assert expected == actual | ||
|
||
def test_schema_from_json_with_file_object(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
|
||
expected = list() | ||
expected.append(SchemaField("qtr", "STRING", "REQUIRED", "quarter")) | ||
expected.append( | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative") | ||
) | ||
expected.append(SchemaField("sales", "FLOAT", "NULLABLE", "total sales")) | ||
|
||
client = self._make_client() | ||
|
||
fake_file = io.StringIO(file_content) | ||
actual = client.schema_from_json(fake_file) | ||
|
||
assert expected == actual | ||
|
||
def test_schema_to_json_with_file_path(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
schema_list = list() | ||
schema_list.append(SchemaField("qtr", "STRING", "REQUIRED", "quarter")) | ||
schema_list.append( | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative") | ||
) | ||
schema_list.append(SchemaField("sales", "FLOAT", "NULLABLE", "total sales")) | ||
|
||
client = self._make_client() | ||
mock_file_path = "/mocked/file.json" | ||
|
||
open_patch = mock.patch("builtins.open", mock.mock_open()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the test logs, it looks like this is tripping up Python 2. https://stackoverflow.com/a/34677735/101923
Remember you can run against Python 2 by using We could check the Python version with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed all the issues and the code is compatible with python 2 now. |
||
with open_patch as _mock_file: | ||
actual = client.schema_to_json(schema_list, mock_file_path) | ||
_mock_file.assert_called_once_with(mock_file_path, mode="w") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might need |
||
_mock_file().write.assert_called_once_with(file_content) | ||
# This assert is to make sure __exit__ is called in the context | ||
# manager that opens the file in the function | ||
_mock_file().__exit__.assert_called_once_with(None, None, None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't care about the actual arguments, so just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the arguments from the assert. |
||
|
||
def test_schema_to_json_with_file_object(self): | ||
from google.cloud.bigquery.schema import SchemaField | ||
|
||
file_content = """[ | ||
{ | ||
"description": "quarter", | ||
"mode": "REQUIRED", | ||
"name": "qtr", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "sales representative", | ||
"mode": "NULLABLE", | ||
"name": "rep", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"description": "total sales", | ||
"mode": "NULLABLE", | ||
"name": "sales", | ||
"type": "FLOAT" | ||
} | ||
]""" | ||
schema_list = list() | ||
schema_list.append(SchemaField("qtr", "STRING", "REQUIRED", "quarter")) | ||
schema_list.append( | ||
SchemaField("rep", "STRING", "NULLABLE", "sales representative") | ||
) | ||
schema_list.append(SchemaField("sales", "FLOAT", "NULLABLE", "total sales")) | ||
|
||
fake_file = io.StringIO() | ||
client = self._make_client() | ||
|
||
client.schema_to_json(schema_list, fake_file) | ||
assert file_content == fake_file.getvalue() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test has been updated with the list of dictionaries and updated assert. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: since
file
is a built-in, usefile_
orfile_obj
as the name.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed name as directed.