From 27a4fc36cf62d2530468a7d87f9346cbc6ff8830 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 2 Aug 2019 17:43:54 +0200 Subject: [PATCH] Add support for ARRAY type in to_standard_sql() --- bigquery/google/cloud/bigquery/schema.py | 28 ++++++++++--- bigquery/tests/unit/test_schema.py | 53 ++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py index 522ce208aa17..61bc0bcedfd6 100644 --- a/bigquery/google/cloud/bigquery/schema.py +++ b/bigquery/google/cloud/bigquery/schema.py @@ -37,6 +37,7 @@ "DATE": types.StandardSqlDataType.DATE, "TIME": types.StandardSqlDataType.TIME, "DATETIME": types.StandardSqlDataType.DATETIME, + # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL types.""" @@ -179,13 +180,28 @@ def to_standard_sql(self): An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. """ sql_type = types.StandardSqlDataType() - sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED - ) - # NOTE: No need to also handle the "ARRAY" composed type, the latter - # does not exist in legacy SQL types. - if sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721 + if self.mode == "REPEATED": + sql_type.type_kind = types.StandardSqlDataType.ARRAY + else: + sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( + self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + ) + + if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721 + array_element_type = LEGACY_TO_STANDARD_TYPES.get( + self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + ) + sql_type.array_element_type.type_kind = array_element_type + + # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type + if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721 + sql_type.array_element_type.struct_type.fields.extend( + field.to_standard_sql() for field in self.fields + ) + + elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721 sql_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) diff --git a/bigquery/tests/unit/test_schema.py b/bigquery/tests/unit/test_schema.py index b813c9091acf..682e45895852 100644 --- a/bigquery/tests/unit/test_schema.py +++ b/bigquery/tests/unit/test_schema.py @@ -254,6 +254,59 @@ def test_to_standard_sql_struct_type(self): standard_field = schema_field.to_standard_sql() self.assertEqual(standard_field, expected_result) + def test_to_standard_sql_array_type_simple(self): + from google.cloud.bigquery_v2 import types + + sql_type = self._get_standard_sql_data_type_class() + + # construct expected result object + expected_sql_type = sql_type(type_kind=sql_type.ARRAY) + expected_sql_type.array_element_type.type_kind = sql_type.INT64 + expected_result = types.StandardSqlField( + name="valid_numbers", type=expected_sql_type + ) + + # construct "repeated" SchemaField object and convert to standard SQL + schema_field = self._make_one("valid_numbers", "INT64", mode="REPEATED") + standard_field = schema_field.to_standard_sql() + + self.assertEqual(standard_field, expected_result) + + def test_to_standard_sql_array_type_struct(self): + from google.cloud.bigquery_v2 import types + + sql_type = self._get_standard_sql_data_type_class() + + # define person STRUCT + name_field = types.StandardSqlField( + name="name", type=sql_type(type_kind=sql_type.STRING) + ) + age_field = types.StandardSqlField( + name="age", type=sql_type(type_kind=sql_type.INT64) + ) + person_struct = types.StandardSqlField( + name="person_info", type=sql_type(type_kind=sql_type.STRUCT) + ) + person_struct.type.struct_type.fields.extend([name_field, age_field]) + + # define expected result - an ARRAY of person structs + expected_sql_type = sql_type( + type_kind=sql_type.ARRAY, array_element_type=person_struct.type + ) + expected_result = types.StandardSqlField( + name="known_people", type=expected_sql_type + ) + + # construct legacy repeated SchemaField object + sub_field1 = self._make_one("name", "STRING") + sub_field2 = self._make_one("age", "INTEGER") + schema_field = self._make_one( + "known_people", "RECORD", fields=(sub_field1, sub_field2), mode="REPEATED" + ) + + standard_field = schema_field.to_standard_sql() + self.assertEqual(standard_field, expected_result) + def test_to_standard_sql_unknown_type(self): sql_type = self._get_standard_sql_data_type_class() field = self._make_one("weird_field", "TROOLEAN")