googleapis · HemangChothani · Nov 4, 2020 · tswast · Nov 4, 2020 · tswast
@@ -0,0 +1,40 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def load_table_dataframe_array_contains(table_id):
+
+    # [START bigquery_load_table_dataframe_array_contains]
+
+    from google.cloud import bigquery
+    import pandas
+
+    # Construct a BigQuery client object.
+    client = bigquery.Client()
+
+    # TODO(developer): Set table_id to the ID of the table to create.
+    # table_id = "your-project.your_dataset.your_table_name"
+
+    dataframe = pandas.DataFrame({"A": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]})
+    job = client.load_table_from_dataframe(dataframe, table_id)  # Make an API request.
+    job.result()  # Wait for the job to complete.
+
+    table = client.get_table(table_id)  # Make an API request.
+    print(
+        "Loaded {} rows and {} columns to {}".format(
+            table.num_rows, len(table.schema), table_id
+        )
+    )
+    # [END bigquery_load_table_dataframe_array_contains]
+    return table
@@ -6,5 +6,5 @@ ipython==7.16.1; python_version < '3.7'
 ipython==7.17.0; python_version >= '3.7'
 matplotlib==3.3.2
 pandas==1.1.4
-pyarrow==1.0.1
+pyarrow==2.0.0
 pytz==2020.1
@@ -0,0 +1,34 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from .. import load_table_dataframe_array_contains
+
+
+pandas = pytest.importorskip("pandas")
+pyarrow = pytest.importorskip("pyarrow", minversion="2.0.0")
+
+
+def test_load_table_dataframe_array_contains(capsys, random_table_id):
+
+    table = load_table_dataframe_array_contains.load_table_dataframe_array_contains(
+        random_table_id
+    )
+    out, _ = capsys.readouterr()
+    expected_column_names = ["A"]
+    assert "Loaded 3 rows and {} columns".format(len(expected_column_names)) in out
+
+    column_names = [field.name for field in table.schema]
+    assert column_names == expected_column_names
@@ -46,12 +46,12 @@
         # grpc.Channel.close() method isn't added until 1.32.0.
         # https://github.com/grpc/grpc/pull/15254
         "grpcio >= 1.32.0, < 2.0dev",
-        "pyarrow >= 1.0.0, < 2.0dev",
+        "pyarrow >= 2.0.0, < 3.0dev",
     ],
     "pandas": [
         "pandas>=0.23.0",
-        # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword.
-        "pyarrow >= 1.0.0, < 2.0dev",
+        # pyarrow 2.0.0 is required for the use of arrays in dataframe to load the table .
+        "pyarrow >= 2.0.0, < 3.0dev",
     ],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [

@@ -129,7 +129,7 @@
 )
 
 PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0")
-PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0")
+PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("2.0.0")
 
 if pandas:
     PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
@@ -1086,9 +1086,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self):
 
     @unittest.skipIf(
         pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION,
-        "Only `pyarrow version >=0.17.0` is supported",
+        "Only `pyarrow version >=2.0.0` is supported",
     )
-    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(pandas is None, "Requires " "`pandas`")
     def test_load_table_from_dataframe_w_struct_datatype(self):
         """Test that a DataFrame with struct datatype can be uploaded if a
         BigQuery schema is specified.
@@ -1126,6 +1126,62 @@ def test_load_table_from_dataframe_w_struct_datatype(self):
         self.assertEqual(table.schema, table_schema)
         self.assertEqual(table.num_rows, 3)
 
+    @unittest.skipIf(
+        pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION,
+        "Only `pyarrow version >=2.0.0` is supported",
+    )
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    def test_load_table_from_dataframe_w_array_datatype(self):
+        """Test that a DataFrame contains array can be uploaded if a
+        BigQuery without specifying a schema.
+
+        https://github.com/googleapis/python-bigquery/issues/19
+        """
+        table_schema = [
+            bigquery.SchemaField(
+                "A",
+                "RECORD",
+                "NULLABLE",
+                None,
+                (
+                    bigquery.SchemaField(
+                        "list",
+                        "RECORD",
+                        "REPEATED",
+                        None,
+                        (
+                            bigquery.SchemaField(
+                                "item", "INTEGER", "NULLABLE", None, (), None
+                            ),
+                        ),
+                        None,
+                    ),
+                ),
+                None,
+            )
+        ]
+        dataset_id = _make_dataset_id("bq_load_test")
+        self.temp_dataset(dataset_id)
+        table_id = "{}.{}.load_table_from_dataframe_w_array_datatype".format(
+            Config.CLIENT.project, dataset_id
+        )
+
+        job_config = bigquery.LoadJobConfig(autodetect=True)
+        table = retry_403(Config.CLIENT.create_table)(Table(table_id))
+        self.to_delete.insert(0, table)
+
+        dataframe = pandas.DataFrame({"A": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]})
+
+        load_job = Config.CLIENT.load_table_from_dataframe(
+            dataframe, table_id, job_config=job_config
+        )
+        load_job.result()
+
+        table = Config.CLIENT.get_table(table_id)
+
+        self.assertEqual(table.schema, table_schema)
+        self.assertEqual(table.num_rows, 3)
+
     def test_load_table_from_json_basic_use(self):
         table_schema = (
             bigquery.SchemaField("name", "STRING", mode="REQUIRED"),