adds to_dataframe() helper to QueryJob

googleapis · Nov 18, 2017 · e9a8479 · e9a8479
1 parent 89e78a7
commit e9a8479
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 1 deletion.
diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py
@@ -1929,7 +1929,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
         :type retry: :class:`google.api_core.retry.Retry`
         :param retry: (Optional) How to retry the call that retrieves rows.
 
-        :rtype: :class:`~google.api_core.page_iterator.Iterator`
+        :rtype: :class:`~google.cloud.bigquery.table.RowIterator`
         :returns:
             Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s.
             During each page, the iterator will have the ``total_rows``
@@ -1949,6 +1949,19 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
         return self._client.list_rows(dest_table, selected_fields=schema,
                                       retry=retry)
 
+    def to_dataframe(self):
+        """Return a pandas DataFrame from a QueryJob
+
+        Returns:
+            A :class:`~pandas.DataFrame` populated with row data and column
+            headers from the query results. The column headers are derived
+            from the destination table's schema.
+
+        Raises:
+            ValueError: If the `pandas` library cannot be imported.
+        """
+        return self.result().to_dataframe()
+
     def __iter__(self):
         return iter(self.result())
 

diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py
@@ -2724,6 +2724,41 @@ def test_reload_w_alternate_client(self):
         self.assertEqual(req['path'], PATH)
         self._verifyResourceProperties(job, RESOURCE)
 
+    @unittest.skipIf(pandas is None, 'Requires `pandas`')
+    def test_to_dataframe(self):
+        begun_resource = self._make_resource()
+        query_resource = {
+            'jobComplete': True,
+            'jobReference': {
+                'projectId': self.PROJECT,
+                'jobId': self.JOB_ID,
+            },
+            'schema': {
+                'fields': [
+                    {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
+                    {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
+                ],
+            },
+            'rows': [
+                {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
+                {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
+                {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
+                {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
+            ],
+        }
+        done_resource = copy.deepcopy(begun_resource)
+        done_resource['status'] = {'state': 'DONE'}
+        connection = _Connection(
+            begun_resource, query_resource, done_resource, query_resource)
+        client = _make_client(project=self.PROJECT, connection=connection)
+        job = self._make_one(self.JOB_ID, self.QUERY, client)
+
+        df = job.to_dataframe()
+
+        self.assertIsInstance(df, pandas.DataFrame)
+        self.assertEqual(len(df), 4)  # verify the number of rows
+        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
+
     def test_iter(self):
         import types