Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: deprecate pandas code paths that don't use pyarrow #48

Merged
merged 6 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Issue a warning in table.to dataframe() w/o pyarrow
  • Loading branch information
plamut committed Feb 26, 2020
commit ab3d3b00c4ab4366b91e1436aa3089ad99027418
9 changes: 9 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
from google.cloud.bigquery.exceptions import PyarrowMissingWarning
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration

Expand Down Expand Up @@ -1737,6 +1738,14 @@ def to_dataframe(
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column])
return df
else:
warnings.warn(
"Converting to a dataframe without pyarrow installed is "
"often slower and will become unsupported in the future. "
"Please install the pyarrow package.",
PyarrowMissingWarning,
stacklevel=2,
)

# The bqstorage_client is only used if pyarrow is available, so the
# rest of this method only needs to account for tabledata.list.
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,6 +2239,38 @@ def test_to_dataframe(self):
self.assertEqual(df.name.dtype.name, "object")
self.assertEqual(df.age.dtype.name, "int64")

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_warning_wo_pyarrow(self):
from google.cloud.bigquery.client import PyarrowMissingWarning
from google.cloud.bigquery.schema import SchemaField

schema = [
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
rows = [
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
]
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None)
catch_warnings = warnings.catch_warnings(record=True)

with no_pyarrow_patch, catch_warnings as warned:
df = row_iterator.to_dataframe()

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2)
matches = [
warning for warning in warned if warning.category is PyarrowMissingWarning
]
self.assertTrue(
matches, msg="A missing pyarrow deprecation warning was not raised."
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(tqdm is None, "Requires `tqdm`")
@mock.patch("tqdm.tqdm_gui")
Expand Down