Skip to content

Commit

Permalink
Add support for ARRAY type in to_standard_sql()
Browse files Browse the repository at this point in the history
  • Loading branch information
plamut committed Aug 2, 2019
1 parent 3f03054 commit 27a4fc3
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 6 deletions.
28 changes: 22 additions & 6 deletions bigquery/google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"DATE": types.StandardSqlDataType.DATE,
"TIME": types.StandardSqlDataType.TIME,
"DATETIME": types.StandardSqlDataType.DATETIME,
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
}
"""String names of the legacy SQL types to integer codes of Standard SQL types."""

Expand Down Expand Up @@ -179,13 +180,28 @@ def to_standard_sql(self):
An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`.
"""
sql_type = types.StandardSqlDataType()
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
)

# NOTE: No need to also handle the "ARRAY" composed type, the latter
# does not exist in legacy SQL types.
if sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721
if self.mode == "REPEATED":
sql_type.type_kind = types.StandardSqlDataType.ARRAY
else:
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
)

if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721
array_element_type = LEGACY_TO_STANDARD_TYPES.get(
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
)
sql_type.array_element_type.type_kind = array_element_type

# ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721
sql_type.array_element_type.struct_type.fields.extend(
field.to_standard_sql() for field in self.fields
)

elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721
sql_type.struct_type.fields.extend(
field.to_standard_sql() for field in self.fields
)
Expand Down
53 changes: 53 additions & 0 deletions bigquery/tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,59 @@ def test_to_standard_sql_struct_type(self):
standard_field = schema_field.to_standard_sql()
self.assertEqual(standard_field, expected_result)

def test_to_standard_sql_array_type_simple(self):
from google.cloud.bigquery_v2 import types

sql_type = self._get_standard_sql_data_type_class()

# construct expected result object
expected_sql_type = sql_type(type_kind=sql_type.ARRAY)
expected_sql_type.array_element_type.type_kind = sql_type.INT64
expected_result = types.StandardSqlField(
name="valid_numbers", type=expected_sql_type
)

# construct "repeated" SchemaField object and convert to standard SQL
schema_field = self._make_one("valid_numbers", "INT64", mode="REPEATED")
standard_field = schema_field.to_standard_sql()

self.assertEqual(standard_field, expected_result)

def test_to_standard_sql_array_type_struct(self):
from google.cloud.bigquery_v2 import types

sql_type = self._get_standard_sql_data_type_class()

# define person STRUCT
name_field = types.StandardSqlField(
name="name", type=sql_type(type_kind=sql_type.STRING)
)
age_field = types.StandardSqlField(
name="age", type=sql_type(type_kind=sql_type.INT64)
)
person_struct = types.StandardSqlField(
name="person_info", type=sql_type(type_kind=sql_type.STRUCT)
)
person_struct.type.struct_type.fields.extend([name_field, age_field])

# define expected result - an ARRAY of person structs
expected_sql_type = sql_type(
type_kind=sql_type.ARRAY, array_element_type=person_struct.type
)
expected_result = types.StandardSqlField(
name="known_people", type=expected_sql_type
)

# construct legacy repeated SchemaField object
sub_field1 = self._make_one("name", "STRING")
sub_field2 = self._make_one("age", "INTEGER")
schema_field = self._make_one(
"known_people", "RECORD", fields=(sub_field1, sub_field2), mode="REPEATED"
)

standard_field = schema_field.to_standard_sql()
self.assertEqual(standard_field, expected_result)

def test_to_standard_sql_unknown_type(self):
sql_type = self._get_standard_sql_data_type_class()
field = self._make_one("weird_field", "TROOLEAN")
Expand Down

0 comments on commit 27a4fc3

Please sign in to comment.