googleapis · tswast · Mar 28, 2019 · Mar 23, 2019 · Mar 23, 2019 · Mar 25, 2019
diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py
@@ -30,6 +30,11 @@
 except ImportError:  # pragma: NO COVER
     pandas = None
 
+try:
+    import tqdm
+except ImportError:  # pragma: NO COVER
+    tqdm = None
+
 from google.api_core.page_iterator import HTTPIterator
 
 import google.cloud._helpers
@@ -1334,8 +1339,31 @@ def _to_dataframe_tabledata_list(self, dtypes):
         """Use (slower, but free) tabledata.list to construct a DataFrame."""
         column_names = [field.name for field in self.schema]
         frames = []
+
+        # report progress if tqdm installed
+        progress_bar = None
+        if tqdm is not None:
+            try:
+                progress_bar = tqdm.tqdm(
+                    desc="Downloading", total=self.total_rows, unit="rows"
+                )
+            except (KeyError, TypeError):
+                # Protect ourselves from any tqdm errors. In case of
+                # unexpected tqdm behavior, just fall back to showing
+                # no progress bar.
+                pass
+
         for page in iter(self.pages):
-            frames.append(self._to_dataframe_dtypes(page, column_names, dtypes))
+            current_frame = self._to_dataframe_dtypes(page, column_names, dtypes)
+            frames.append(current_frame)
+
+            if progress_bar is not None:
+                # In some cases, the number of total rows is not populated
+                # until the first page of rows is fetched. Update the
+                # progress bar's total to keep an accurate count.
+                progress_bar.total = progress_bar.total or self.total_rows
+                progress_bar.update(len(current_frame))
+
         return pandas.concat(frames)
 
     def _to_dataframe_bqstorage(self, bqstorage_client, dtypes):

diff --git a/bigquery/setup.py b/bigquery/setup.py
@@ -39,6 +39,7 @@
     # Exclude PyArrow dependency from Windows Python 2.7.
     'pyarrow: platform_system != "Windows" or python_version >= "3.4"':
         'pyarrow>=0.4.1',
+    'tqdm': 'tqdm >= 4.31.1',
     'fastparquet': ['fastparquet', 'python-snappy'],
 }
 

diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py
@@ -29,6 +29,11 @@
 except (ImportError, AttributeError):  # pragma: NO COVER
     pandas = None
 
+try:
+    from tqdm import tqdm
+except (ImportError, AttributeError):  # pragma: NO COVER
+    tqdm = None
+
 from google.cloud.bigquery.dataset import DatasetReference
 
 
@@ -1413,6 +1418,78 @@ def test_to_dataframe(self):
         self.assertEqual(df.name.dtype.name, "object")
         self.assertEqual(df.age.dtype.name, "int64")
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(tqdm is None, "Requires `tqdm`")
+    @mock.patch('tqdm.tqdm')
+    def test_to_dataframe_progress_bar(self, tqdm_mock):
+        from google.cloud.bigquery.table import RowIterator
+        from google.cloud.bigquery.table import SchemaField
+
+        schema = [
+            SchemaField("name", "STRING", mode="REQUIRED"),
+            SchemaField("age", "INTEGER", mode="REQUIRED"),
+        ]
+        rows = [
+            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+            {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
+            {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
+        ]
+        path = "/foo"
+        api_request = mock.Mock(return_value={"rows": rows})
+        row_iterator = RowIterator(_mock_client(), api_request, path, schema)
+        df = row_iterator.to_dataframe()
+
+        tqdm_mock.assert_called()
+        tqdm_mock().update.assert_called()
+
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @mock.patch("google.cloud.bigquery.table.tqdm", new=None)
+    def test_to_dataframe_no_tqdm(self):
+        from google.cloud.bigquery.table import RowIterator
+        from google.cloud.bigquery.table import SchemaField
+
+        schema = [
+            SchemaField("name", "STRING", mode="REQUIRED"),
+            SchemaField("age", "INTEGER", mode="REQUIRED"),
+        ]
+        rows = [
+            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+            {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
+            {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
+        ]
+        path = "/foo"
+        api_request = mock.Mock(return_value={"rows": rows})
+        row_iterator = RowIterator(_mock_client(), api_request, path, schema)
+        df = row_iterator.to_dataframe()
+
+        self.assertEqual(len(df), 4)  # all should be well
+
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(tqdm is None, "Requires `tqdm`")
+    @mock.patch("tqdm.tqdm", new=None)  # will raise TypeError on call
+    def test_to_dataframe_tqdm_error(self):
+        from google.cloud.bigquery.table import RowIterator
+        from google.cloud.bigquery.table import SchemaField
+
+        schema = [
+            SchemaField("name", "STRING", mode="REQUIRED"),
+            SchemaField("age", "INTEGER", mode="REQUIRED"),
+        ]
+        rows = [
+            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+            {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
+            {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
+        ]
+        path = "/foo"
+        api_request = mock.Mock(return_value={"rows": rows})
+        row_iterator = RowIterator(_mock_client(), api_request, path, schema)
+        df = row_iterator.to_dataframe()
+
+        self.assertEqual(len(df), 4)  # all should be well
+
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     def test_to_dataframe_w_empty_results(self):
         from google.cloud.bigquery.table import RowIterator