Skip to content

Commit

Permalink
added FutureWarning to empty Series without dtype and adjusted the te…
Browse files Browse the repository at this point in the history
…sts so that no unnecessary warnings are thrown
  • Loading branch information
SaturnFromTitan committed Nov 17, 2019
1 parent debaf9a commit 7e2f6ad
Show file tree
Hide file tree
Showing 77 changed files with 359 additions and 237 deletions.
4 changes: 2 additions & 2 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,15 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0.
pd.Series([np.nan]).sum()
pd.Series([]).sum()
pd.Series([], dtype="float64").sum()
The product of an empty or all-NA Series or column of a DataFrame is 1.

.. ipython:: python
pd.Series([np.nan]).prod()
pd.Series([]).prod()
pd.Series([], dtype="float64").prod()
NA values in GroupBy
Expand Down
5 changes: 4 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ Other API changes
See :ref:`units registration <whatsnew_1000.matplotlib_units>` for more.
- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
- Initialising an empty :class:`pandas.core.series.Series` without specifying a dtype will raise a FutureWarning now.
The default dtype will change from ``float64`` to ``object`` in future releases so that it is consistent with the
behaviour of :class:`pandas.core.frame.DataFrame` and :class:`pandas.core.indexes.base.Index`.
-


Expand Down Expand Up @@ -243,7 +246,7 @@ Removal of prior version deprecations/changes

Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`).
This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using
matplotlib directly rather than rather than :meth:`~DataFrame.plot`.
matplotlib directly rather than :meth:`~DataFrame.plot`.

To use pandas formatters with a matplotlib plot, specify

Expand Down
2 changes: 1 addition & 1 deletion pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __new__(cls) -> "Series": # type: ignore
stacklevel=6,
)

return Series()
return Series(dtype=object)


class _LoadSparseFrame:
Expand Down
15 changes: 11 additions & 4 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import abc
import inspect
from typing import TYPE_CHECKING, Any, Dict, Iterator, Tuple, Type, Union
from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Type, Union

import numpy as np

Expand Down Expand Up @@ -202,15 +202,15 @@ def apply_empty_result(self):

if not should_reduce:
try:
r = self.f(Series([]))
r = self.f(Series([], dtype=np.float64))
except Exception:
pass
else:
should_reduce = not isinstance(r, Series)

if should_reduce:
if len(self.agg_axis):
r = self.f(Series([]))
r = self.f(Series([], dtype=np.float64))
else:
r = np.nan

Expand Down Expand Up @@ -343,14 +343,21 @@ def apply_series_generator(self) -> Tuple[ResType, "Index"]:
def wrap_results(
self, results: ResType, res_index: "Index"
) -> Union["Series", "DataFrame"]:
from pandas import Series

# see if we can infer the results
if len(results) > 0 and 0 in results and is_sequence(results[0]):

return self.wrap_results_for_axis(results, res_index)

# dict of scalars
result = self.obj._constructor_sliced(results)
# TODO: Remove if/else block when default dtype of Series is changed to object
constructor_sliced = self.obj._constructor_sliced
is_empty = isinstance(results, (list, tuple, dict)) and not results
if constructor_sliced is Series and is_empty:
result = constructor_sliced(results, dtype=np.float64)
else:
result = constructor_sliced(results)
result.index = res_index

return result
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,10 @@ def _map_values(self, mapper, na_action=None):
# possibility that they are tuples
from pandas import Series

mapper = Series(mapper)
if not mapper:
mapper = Series(mapper, dtype=np.float64)
else:
mapper = Series(mapper)

if isinstance(mapper, ABCSeries):
# Since values were input this means we came from either
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8116,7 +8116,8 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
cols = Index([], name=self.columns.name)
if is_list_like(q):
return self._constructor([], index=q, columns=cols)
return self._constructor_sliced([], index=cols, name=q)

return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)

result = data._data.quantile(
qs=q, axis=1, interpolation=interpolation, transposed=is_transposed
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6281,6 +6281,8 @@ def fillna(
2 NaN 1.0 NaN 5
3 NaN 3.0 NaN 4
"""
from pandas import Series

inplace = validate_bool_kwarg(inplace, "inplace")
value, method = validate_fillna_kwargs(value, method)

Expand Down Expand Up @@ -6317,8 +6319,10 @@ def fillna(
return self

if self.ndim == 1:
if isinstance(value, (dict, ABCSeries)):
from pandas import Series
if isinstance(value, dict):
dtype = object if not value else None
value = Series(value, dtype=dtype)
elif isinstance(value, ABCSeries):

value = Series(value)
elif not is_list_like(value):
Expand Down Expand Up @@ -7263,7 +7267,7 @@ def asof(self, where, subset=None):
if not is_series:
from pandas import Series

return Series(index=self.columns, name=where)
return Series(index=self.columns, name=where, dtype=np.float64)
return np.nan

# It's always much faster to use a *while* loop here for
Expand Down
24 changes: 19 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,13 @@ def aggregate(self, func=None, *args, **kwargs):
result = self._aggregate_named(func, *args, **kwargs)

index = Index(sorted(result), name=self.grouper.names[0])
ret = Series(result, index=index)

# TODO: if/else can be removed as soon as default dtype
# for empty series is changed object
if result:
ret = Series(result, index=index)
else:
ret = Series(result, index=index, dtype=object)

if not self.as_index: # pragma: no cover
print("Warning, ignoring as_index=True")
Expand Down Expand Up @@ -348,7 +354,7 @@ def _wrap_transformed_output(self, output, names=None):
def _wrap_applied_output(self, keys, values, not_indexed_same=False):
if len(keys) == 0:
# GH #6265
return Series([], name=self._selection_name, index=keys)
return Series([], name=self._selection_name, index=keys, dtype=np.float64)

def _get_index() -> Index:
if self.grouper.nkeys > 1:
Expand Down Expand Up @@ -430,7 +436,7 @@ def transform(self, func, *args, **kwargs):

result = concat(results).sort_index()
else:
result = Series()
result = Series(dtype=np.float64)

# we will only try to coerce the result type if
# we have a numeric dtype, as these are *always* udfs
Expand Down Expand Up @@ -1164,9 +1170,17 @@ def first_not_none(values):
if v is None:
return DataFrame()
elif isinstance(v, NDFrame):

# this is to silence a FutureWarning
# TODO: Remove when default dtype of empty Series is object
kwargs = v._construct_axes_dict()
if v._constructor is Series:
is_empty = "data" not in kwargs or not kwargs["data"]
if "dtype" not in kwargs and is_empty:
kwargs["dtype"] = object

values = [
x if x is not None else v._constructor(**v._construct_axes_dict())
for x in values
x if (x is not None) else v._constructor(**kwargs) for x in values
]

v = values[0]
Expand Down
19 changes: 18 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
def __init__(
self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
):
no_data = data is None or (isinstance(data, (list, tuple, dict)) and not data)
if no_data and dtype is None:
# Empty Series should have dtype object to be consistent
# with the behaviour of DataFrame and Index
warnings.warn(
"The default dtype for empty Series will be 'object' instead"
" of 'float64' in the next version. Specify a dtype explicitly"
" to silence this warning.",
FutureWarning,
stacklevel=2,
)
# uncomment the line below when removing the FutureWarning
# dtype = np.dtype(object)

# we are called internally, so short-circuit
if fastpath:
Expand Down Expand Up @@ -357,7 +370,11 @@ def _init_dict(self, data, index=None, dtype=None):
keys, values = [], []

# Input is now list-like, so rely on "standard" construction:
s = Series(values, index=keys, dtype=dtype)
# TODO: warning filter can be removed when default dtype for Series
# is changed to object.
with warnings.catch_warnings():
warnings.simplefilter(action="ignore", category=FutureWarning)
s = Series(values, index=keys, dtype=dtype)

# Now we just make sure the order is respected, if any
if data and index is not None:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def _maybe_cache(arg, format, cache, convert_listlike):
"""
from pandas import Series

cache_array = Series()
cache_array = Series(dtype=object)

if cache:
# Perform a quicker unique check
if not should_cache(arg):
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,8 @@ def _parse_tfoot_tr(self, table):


def _expand_elements(body):
lens = Series([len(elem) for elem in body])
dtype = None if body else object
lens = Series([len(elem) for elem in body], dtype=dtype)
lens_max = lens.max()
not_max = lens[lens != lens_max]

Expand Down
69 changes: 39 additions & 30 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
import functools
from io import StringIO
from itertools import islice
import os
Expand Down Expand Up @@ -1005,43 +1006,51 @@ class SeriesParser(Parser):

def _parse_no_numpy(self):

json = self.json
orient = self.orient
if orient == "split":
decoded = {
str(k): v
for k, v in loads(json, precise_float=self.precise_float).items()
}
data = loads(self.json, precise_float=self.precise_float)

if self.orient == "split":
is_empty = self._is_empty(data["data"])
else:
is_empty = self._is_empty(data)
dtype = object if is_empty else None

if self.orient == "split":
decoded = {str(k): v for k, v in data.items()}
self.check_keys_split(decoded)
self.obj = Series(dtype=None, **decoded)
self.obj = Series(**decoded, dtype=dtype)
else:
self.obj = Series(loads(json, precise_float=self.precise_float), dtype=None)
self.obj = Series(data, dtype=dtype)

def _parse_numpy(self):

json = self.json
orient = self.orient
if orient == "split":
decoded = loads(
json, dtype=None, numpy=True, precise_float=self.precise_float
)
decoded = {str(k): v for k, v in decoded.items()}
kwargs = {"dtype": None, "numpy": True, "precise_float": self.precise_float}
if self.orient in ["columns", "index"]:
kwargs["labelled"] = True
loads_ = functools.partial(loads, **kwargs)
data = loads_(self.json)

# this is needed to silence a FutureWarning
# TODO: Remove this when the default dtype of empty Series is changed to object
if self.orient == "split":
is_empty = self._is_empty(data["data"])
else:
is_empty = self._is_empty(data)
dtype = object if is_empty else None

if self.orient == "split":
decoded = {str(k): v for k, v in data.items()}
self.check_keys_split(decoded)
self.obj = Series(**decoded)
elif orient == "columns" or orient == "index":
self.obj = Series(
*loads(
json,
dtype=None,
numpy=True,
labelled=True,
precise_float=self.precise_float,
)
)
self.obj = Series(**decoded, dtype=dtype)
elif self.orient in ["columns", "index"]:
self.obj = Series(*data, dtype=dtype)
else:
self.obj = Series(
loads(json, dtype=None, numpy=True, precise_float=self.precise_float)
)
self.obj = Series(data, dtype=dtype)

@staticmethod
def _is_empty(data):
is_empty_np = isinstance(data, np.ndarray) and (data.size == 0)
is_empty_reg = isinstance(data, (list, tuple, dict)) and not data
return is_empty_np or is_empty_reg

def _try_convert_types(self):
if self.obj is None:
Expand Down
5 changes: 3 additions & 2 deletions pandas/plotting/_matplotlib/boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def maybe_color_bp(self, bp):

def _make_plot(self):
if self.subplots:
self._return_obj = pd.Series()
self._return_obj = pd.Series(dtype=object)

for i, (label, y) in enumerate(self._iter_data()):
ax = self._get_ax(i)
Expand Down Expand Up @@ -407,7 +407,8 @@ def boxplot_frame_groupby(
)
axes = _flatten(axes)

ret = pd.Series()
ret = pd.Series(dtype=object)

for (key, group), ax in zip(grouped, axes):
d = group.boxplot(
ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_replace(to_replace, value, result):
tm.assert_categorical_equal(cat, expected)


@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
def test_isin_empty(empty):
s = pd.Categorical(["a", "b"])
expected = np.array([False, False], dtype=bool)
Expand Down
Loading

0 comments on commit 7e2f6ad

Please sign in to comment.