Skip to content

Commit

Permalink
Issue a warning in table.to dataframe() w/o pyarrow
Browse files Browse the repository at this point in the history
  • Loading branch information
plamut committed Feb 26, 2020
1 parent ddaade7 commit b9df6be
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
9 changes: 9 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
from google.cloud.bigquery.exceptions import PyarrowMissingWarning
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration

Expand Down Expand Up @@ -1737,6 +1738,14 @@ def to_dataframe(
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column])
return df
else:
warnings.warn(
"Converting to a dataframe without pyarrow installed is "
"often slower and will become unsupported in the future. "
"Please install the pyarrow package.",
PyarrowMissingWarning,
stacklevel=2,
)

# The bqstorage_client is only used if pyarrow is available, so the
# rest of this method only needs to account for tabledata.list.
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,6 +2239,38 @@ def test_to_dataframe(self):
self.assertEqual(df.name.dtype.name, "object")
self.assertEqual(df.age.dtype.name, "int64")

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_warning_wo_pyarrow(self):
from google.cloud.bigquery.client import PyarrowMissingWarning
from google.cloud.bigquery.schema import SchemaField

schema = [
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
rows = [
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
]
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None)
catch_warnings = warnings.catch_warnings(record=True)

with no_pyarrow_patch, catch_warnings as warned:
df = row_iterator.to_dataframe()

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2)
matches = [
warning for warning in warned if warning.category is PyarrowMissingWarning
]
self.assertTrue(
matches, msg="A missing pyarrow deprecation warning was not raised."
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(tqdm is None, "Requires `tqdm`")
@mock.patch("tqdm.tqdm_gui")
Expand Down

0 comments on commit b9df6be

Please sign in to comment.