Skip to content

Commit

Permalink
chore: deprecate pandas code paths that don't use pyarrow (#48)
Browse files Browse the repository at this point in the history
* feat: deprecate code paths without pyarrow

* Emit warning if loading dataframe data w/o pyarrow

* Issue a warning in table.to dataframe() w/o pyarrow
  • Loading branch information
plamut authored Mar 17, 2020
1 parent a17be5f commit be5c8b1
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 1 deletion.
3 changes: 3 additions & 0 deletions google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import StandardSqlDataTypes
from google.cloud.bigquery.exceptions import PyarrowMissingWarning
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.external_config import BigtableOptions
from google.cloud.bigquery.external_config import BigtableColumnFamily
Expand Down Expand Up @@ -142,6 +143,8 @@
"WriteDisposition",
# EncryptionConfiguration
"EncryptionConfiguration",
# Errors and warnings
"PyarrowMissingWarning",
]


Expand Down
10 changes: 10 additions & 0 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.exceptions import PyarrowMissingWarning
from google.cloud.bigquery import job
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
Expand Down Expand Up @@ -1848,6 +1849,15 @@ def load_table_from_dataframe(
parquet_compression=parquet_compression,
)
else:
if not pyarrow:
warnings.warn(
"Loading dataframe data without pyarrow installed is "
"deprecated and will become unsupported in the future. "
"Please install the pyarrow package.",
PyarrowMissingWarning,
stacklevel=2,
)

if job_config.schema:
warnings.warn(
"job_config.schema is set, but not used to assist in "
Expand Down
17 changes: 17 additions & 0 deletions google/cloud/bigquery/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class PyarrowMissingWarning(DeprecationWarning):
pass
9 changes: 9 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
from google.cloud.bigquery.exceptions import PyarrowMissingWarning
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration

Expand Down Expand Up @@ -1739,6 +1740,14 @@ def to_dataframe(
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column])
return df
else:
warnings.warn(
"Converting to a dataframe without pyarrow installed is "
"often slower and will become unsupported in the future. "
"Please install the pyarrow package.",
PyarrowMissingWarning,
stacklevel=2,
)

# The bqstorage_client is only used if pyarrow is available, so the
# rest of this method only needs to account for tabledata.list.
Expand Down
40 changes: 39 additions & 1 deletion tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6580,6 +6580,42 @@ def test_load_table_from_dataframe_unknown_table(self):
job_config=mock.ANY,
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(fastparquet is None, "Requires `fastparquet`")
def test_load_table_from_dataframe_no_pyarrow_warning(self):
from google.cloud.bigquery.client import PyarrowMissingWarning

client = self._make_client()

# Pick at least one column type that translates to Pandas dtype
# "object". A string column matches that.
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
dataframe = pandas.DataFrame(records)

get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
side_effect=google.api_core.exceptions.NotFound("Table not found"),
)
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None)
pyarrow_patch_helpers = mock.patch(
"google.cloud.bigquery._pandas_helpers.pyarrow", None
)
catch_warnings = warnings.catch_warnings(record=True)

with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned:
client.load_table_from_dataframe(
dataframe, self.TABLE_REF, location=self.LOCATION
)

matches = [
warning for warning in warned if warning.category is PyarrowMissingWarning
]
assert matches, "A missing pyarrow deprecation warning was not raised."

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(fastparquet is None, "Requires `fastparquet`")
def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self):
Expand Down Expand Up @@ -6854,7 +6890,9 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):
assert warned # there should be at least one warning
for warning in warned:
assert "pyarrow" in str(warning)
assert warning.category in (DeprecationWarning, PendingDeprecationWarning)
assert issubclass(
warning.category, (DeprecationWarning, PendingDeprecationWarning)
)

load_table_from_file.assert_called_once_with(
client,
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,6 +2239,38 @@ def test_to_dataframe(self):
self.assertEqual(df.name.dtype.name, "object")
self.assertEqual(df.age.dtype.name, "int64")

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_warning_wo_pyarrow(self):
from google.cloud.bigquery.client import PyarrowMissingWarning
from google.cloud.bigquery.schema import SchemaField

schema = [
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
rows = [
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
]
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None)
catch_warnings = warnings.catch_warnings(record=True)

with no_pyarrow_patch, catch_warnings as warned:
df = row_iterator.to_dataframe()

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2)
matches = [
warning for warning in warned if warning.category is PyarrowMissingWarning
]
self.assertTrue(
matches, msg="A missing pyarrow deprecation warning was not raised."
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(tqdm is None, "Requires `tqdm`")
@mock.patch("tqdm.tqdm_gui")
Expand Down

0 comments on commit be5c8b1

Please sign in to comment.