Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Phase 1 for storing schemas for later use. #7761

Merged
merged 11 commits into from
Apr 25, 2019
56 changes: 56 additions & 0 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import functools
import gzip
import io
import json
import os
import tempfile
import uuid
Expand Down Expand Up @@ -50,6 +52,7 @@
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.query import _QueryResults
from google.cloud.bigquery.retry import DEFAULT_RETRY
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import _table_arg_to_table
from google.cloud.bigquery.table import _table_arg_to_table_ref
from google.cloud.bigquery.table import Table
Expand All @@ -71,6 +74,9 @@
_READ_LESS_THAN_SIZE = (
"Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
)
_NEED_JSON_FILE_ARGUMENT = (
"The JSON file argument should be a file object or a file path"
)
_NEED_TABLE_ARGUMENT = (
"The table argument should be a table ID string, Table, or TableReference"
)
Expand Down Expand Up @@ -1929,6 +1935,56 @@ def list_rows(
)
return row_iterator

def _schema_from_json_file_object(self, file_obj):
"""Helper function for schema_from_json that takes a
file object that describes a table schema.

Returns:
List of schema field objects.
"""
json_data = json.load(file_obj)
return [SchemaField.from_api_repr(f) for f in json_data]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonderful!

One nit-pick, though. Style-wise in our client libraries and samples, we avoid single-letter variable names, even in list comprehensions. Let's rename f to field.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Variable name has been updated!


def _schema_to_json_file_object(self, schema_list, file_obj):
"""Helper function for schema_to_json that takes a schema list and file
object and writes the schema list to the file object with json.dump
"""
json.dump(schema_list, file_obj, indent=2, sort_keys=True)

def schema_from_json(self, file_or_path):
"""Takes a file object or file path that contains json that describes
a table schema.

Returns:
List of schema field objects.
"""
if isinstance(file_or_path, io.IOBase):
return self._schema_from_json_file_object(file_or_path)

try:
with open(file_or_path) as file_obj:
return self._schema_from_json_file_object(file_obj)
except OSError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, coverage is failing on this line and the similar line in the other function. We'd need a test where open() fails.

Honestly, I'd be okay removing the try block and letting these errors just raise, too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the try block as suggested for both functions.

raise ValueError(_NEED_JSON_FILE_ARGUMENT)

def schema_to_json(self, schema_list, destination):
"""Takes a list of schema field objects.

Serializes the list of schema field objects as json to a file.

Destination is a file path or a file object.
"""
json_schema_list = [f.to_api_repr() for f in schema_list]

if isinstance(destination, io.IOBase):
return self._schema_to_json_file_object(json_schema_list, destination)

try:
with open(destination, mode="w") as file_obj:
return self._schema_to_json_file_object(json_schema_list, file_obj)
except OSError:
raise ValueError(_NEED_JSON_FILE_ARGUMENT)


# pylint: disable=unused-argument
def _item_to_project(iterator, resource):
Expand Down
163 changes: 163 additions & 0 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5161,3 +5161,166 @@ def test__do_multipart_upload_wrong_size(self):

with pytest.raises(ValueError):
client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None)

def test_schema_from_json_with_file_path(self):
from google.cloud.bigquery.schema import SchemaField

file_content = """[
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING"
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING"
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT"
}
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

client = self._make_client()
mock_file_path = "/mocked/file.json"
tswast marked this conversation as resolved.
Show resolved Hide resolved

open_patch = mock.patch(
"builtins.open", new=mock.mock_open(read_data=file_content)
)

with open_patch as _mock_file:
actual = client.schema_from_json(mock_file_path)
_mock_file.assert_called_once_with(mock_file_path)
tswast marked this conversation as resolved.
Show resolved Hide resolved
# This assert is to make sure __exit__ is called in the context
# manager that opens the file in the function
_mock_file().__exit__.assert_called_once()

assert expected == actual

def test_schema_from_json_with_file_object(self):
from google.cloud.bigquery.schema import SchemaField

file_content = """[
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING"
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING"
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT"
}
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

client = self._make_client()

fake_file = io.StringIO(file_content)
actual = client.schema_from_json(fake_file)

assert expected == actual

def test_schema_to_json_with_file_path(self):
from google.cloud.bigquery.schema import SchemaField

file_content = [
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING",
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING",
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT",
},
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

client = self._make_client()
mock_file_path = "/mocked/file.json"

open_patch = mock.patch("builtins.open", mock.mock_open())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the test logs, it looks like this is tripping up Python 2. https://stackoverflow.com/a/34677735/101923

=================================== FAILURES ===================================
____________ TestClientUpload.test_schema_from_json_with_file_path _____________
self = <tests.unit.test_client.TestClientUpload object at 0x7f47e2dce4d0>
    def test_schema_from_json_with_file_path(self):
        from google.cloud.bigquery.schema import SchemaField
        file_content = """[
          {
            "description": "quarter",
            "mode": "REQUIRED",
            "name": "qtr",
            "type": "STRING"
          },
          {
            "description": "sales representative",
            "mode": "NULLABLE",
            "name": "rep",
            "type": "STRING"
          },
          {
            "description": "total sales",
            "mode": "NULLABLE",
            "name": "sales",
            "type": "FLOAT"
          }
        ]"""
        expected = [
            SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
            SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
            SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
        ]
        client = self._make_client()
        mock_file_path = "/mocked/file.json"
        open_patch = mock.patch(
            "builtins.open", new=mock.mock_open(read_data=file_content)
        )
>       with open_patch as _mock_file:
tests/unit/test_client.py:5202:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.nox/unit-2-7/lib/python2.7/site-packages/mock/mock.py:1353: in __enter__
    self.target = self.getter()
.nox/unit-2-7/lib/python2.7/site-packages/mock/mock.py:1523: in <lambda>
    getter = lambda: _importer(target)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
target = 'builtins'
    def _importer(target):
        components = target.split('.')
        import_path = components.pop(0)
>       thing = __import__(import_path)
E       ImportError: No module named builtins

Remember you can run against Python 2 by using nox, locally.

We could check the Python version with sys.version_info.major and change what object you are patching.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually six.PY2 is probably a better way to detect. https://pythonhosted.org/six/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed all the issues and the code is compatible with python 2 now.

with open_patch as _mock_file:
with mock.patch("json.dump") as _mock_dump:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like your thinking here. It's definitely nicer to compare lists than it is to compare JSON strings.

Nit: Let's write both with statement on one line. https://stackoverflow.com/a/1073814/101923

Nit: No need for leading underscore, that's usually to indicate a "private" variable, which isn't relevant inside a test.

with open_patch as mock_file, mock.patch("json.dump") as mock_dump:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace existing with statements with suggested line.

client.schema_to_json(schema_list, mock_file_path)
_mock_file.assert_called_once_with(mock_file_path, mode="w")
# This assert is to make sure __exit__ is called in the context
# manager that opens the file in the function
_mock_file().__exit__.assert_called_once()
_mock_dump.assert_called_with(
file_content, _mock_file.return_value, indent=2, sort_keys=True
)

def test_schema_to_json_with_file_object(self):
from google.cloud.bigquery.schema import SchemaField

file_content = """[
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING"
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING"
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT"
}
]"""
schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

fake_file = io.StringIO()
client = self._make_client()

client.schema_to_json(schema_list, fake_file)
assert file_content == fake_file.getvalue()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's call json.loads(fake_file.getvalue()) and compare to a list of dictionaries like you do in the test of the path version.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test has been updated with the list of dictionaries and updated assert.