From b9df6beafd58b2aeb913878842895e9981d0b828 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 26 Feb 2020 15:25:12 +0000 Subject: [PATCH] Issue a warning in table.to dataframe() w/o pyarrow --- google/cloud/bigquery/table.py | 9 +++++++++ tests/unit/test_table.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 1da0617207..72d17b6f7a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -54,6 +54,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -1737,6 +1738,14 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) return df + else: + warnings.warn( + "Converting to a dataframe without pyarrow installed is " + "often slower and will become unsupported in the future. " + "Please install the pyarrow package.", + PyarrowMissingWarning, + stacklevel=2, + ) # The bqstorage_client is only used if pyarrow is available, so the # rest of this method only needs to account for tabledata.list. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c1611c0848..5bcd60986d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2239,6 +2239,38 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_warning_wo_pyarrow(self): + from google.cloud.bigquery.client import PyarrowMissingWarning + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + catch_warnings = warnings.catch_warnings(record=True) + + with no_pyarrow_patch, catch_warnings as warned: + df = row_iterator.to_dataframe() + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) + matches = [ + warning for warning in warned if warning.category is PyarrowMissingWarning + ] + self.assertTrue( + matches, msg="A missing pyarrow deprecation warning was not raised." + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui")