added FutureWarning to empty Series without dtype and adjusted the te…

…sts so that no unnecessary warnings are thrown
pandas-dev · Nov 17, 2019 · 7e2f6ad · 7e2f6ad
1 parent debaf9a
commit 7e2f6ad
Show file tree

Hide file tree

Showing 77 changed files with 359 additions and 237 deletions.
diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -189,15 +189,15 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0.
 
    pd.Series([np.nan]).sum()
 
-   pd.Series([]).sum()
+   pd.Series([], dtype="float64").sum()
 
 The product of an empty or all-NA Series or column of a DataFrame is 1.
 
 .. ipython:: python
 
    pd.Series([np.nan]).prod()
 
-   pd.Series([]).prod()
+   pd.Series([], dtype="float64").prod()
 
 
 NA values in GroupBy

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -201,6 +201,9 @@ Other API changes
   See :ref:`units registration <whatsnew_1000.matplotlib_units>` for more.
 - :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
   Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
+- Initialising an empty :class:`pandas.core.series.Series` without specifying a dtype will raise a FutureWarning now.
+  The default dtype will change from ``float64`` to ``object`` in future releases so that it is consistent with the
+  behaviour of :class:`pandas.core.frame.DataFrame` and :class:`pandas.core.indexes.base.Index`.
 -
 
 
@@ -243,7 +246,7 @@ Removal of prior version deprecations/changes
 
 Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`).
 This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using
-matplotlib directly rather than rather than :meth:`~DataFrame.plot`.
+matplotlib directly rather than :meth:`~DataFrame.plot`.
 
 To use pandas formatters with a matplotlib plot, specify
 

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
@@ -64,7 +64,7 @@ def __new__(cls) -> "Series":  # type: ignore
             stacklevel=6,
         )
 
-        return Series()
+        return Series(dtype=object)
 
 
 class _LoadSparseFrame:

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -1,6 +1,6 @@
 import abc
 import inspect
-from typing import TYPE_CHECKING, Any, Dict, Iterator, Tuple, Type, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Type, Union
 
 import numpy as np
 
@@ -202,15 +202,15 @@ def apply_empty_result(self):
 
         if not should_reduce:
             try:
-                r = self.f(Series([]))
+                r = self.f(Series([], dtype=np.float64))
             except Exception:
                 pass
             else:
                 should_reduce = not isinstance(r, Series)
 
         if should_reduce:
             if len(self.agg_axis):
-                r = self.f(Series([]))
+                r = self.f(Series([], dtype=np.float64))
             else:
                 r = np.nan
 
@@ -343,14 +343,21 @@ def apply_series_generator(self) -> Tuple[ResType, "Index"]:
     def wrap_results(
         self, results: ResType, res_index: "Index"
     ) -> Union["Series", "DataFrame"]:
+        from pandas import Series
 
         # see if we can infer the results
         if len(results) > 0 and 0 in results and is_sequence(results[0]):
 
             return self.wrap_results_for_axis(results, res_index)
 
         # dict of scalars
-        result = self.obj._constructor_sliced(results)
+        # TODO: Remove if/else block when default dtype of Series is changed to object
+        constructor_sliced = self.obj._constructor_sliced
+        is_empty = isinstance(results, (list, tuple, dict)) and not results
+        if constructor_sliced is Series and is_empty:
+            result = constructor_sliced(results, dtype=np.float64)
+        else:
+            result = constructor_sliced(results)
         result.index = res_index
 
         return result

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1256,7 +1256,10 @@ def _map_values(self, mapper, na_action=None):
                 # possibility that they are tuples
                 from pandas import Series
 
-                mapper = Series(mapper)
+                if not mapper:
+                    mapper = Series(mapper, dtype=np.float64)
+                else:
+                    mapper = Series(mapper)
 
         if isinstance(mapper, ABCSeries):
             # Since values were input this means we came from either

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8116,7 +8116,8 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
             cols = Index([], name=self.columns.name)
             if is_list_like(q):
                 return self._constructor([], index=q, columns=cols)
-            return self._constructor_sliced([], index=cols, name=q)
+
+            return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)
 
         result = data._data.quantile(
             qs=q, axis=1, interpolation=interpolation, transposed=is_transposed

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6281,6 +6281,8 @@ def fillna(
         2   NaN 1.0 NaN 5
         3   NaN 3.0 NaN 4
         """
+        from pandas import Series
+
         inplace = validate_bool_kwarg(inplace, "inplace")
         value, method = validate_fillna_kwargs(value, method)
 
@@ -6317,8 +6319,10 @@ def fillna(
                 return self
 
             if self.ndim == 1:
-                if isinstance(value, (dict, ABCSeries)):
-                    from pandas import Series
+                if isinstance(value, dict):
+                    dtype = object if not value else None
+                    value = Series(value, dtype=dtype)
+                elif isinstance(value, ABCSeries):
 
                     value = Series(value)
                 elif not is_list_like(value):
@@ -7263,7 +7267,7 @@ def asof(self, where, subset=None):
                 if not is_series:
                     from pandas import Series
 
-                    return Series(index=self.columns, name=where)
+                    return Series(index=self.columns, name=where, dtype=np.float64)
                 return np.nan
 
             # It's always much faster to use a *while* loop here for

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -251,7 +251,13 @@ def aggregate(self, func=None, *args, **kwargs):
                 result = self._aggregate_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
-            ret = Series(result, index=index)
+
+            # TODO: if/else can be removed as soon as default dtype
+            #  for empty series is changed object
+            if result:
+                ret = Series(result, index=index)
+            else:
+                ret = Series(result, index=index, dtype=object)
 
         if not self.as_index:  # pragma: no cover
             print("Warning, ignoring as_index=True")
@@ -348,7 +354,7 @@ def _wrap_transformed_output(self, output, names=None):
     def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             # GH #6265
-            return Series([], name=self._selection_name, index=keys)
+            return Series([], name=self._selection_name, index=keys, dtype=np.float64)
 
         def _get_index() -> Index:
             if self.grouper.nkeys > 1:
@@ -430,7 +436,7 @@ def transform(self, func, *args, **kwargs):
 
             result = concat(results).sort_index()
         else:
-            result = Series()
+            result = Series(dtype=np.float64)
 
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* udfs
@@ -1164,9 +1170,17 @@ def first_not_none(values):
             if v is None:
                 return DataFrame()
             elif isinstance(v, NDFrame):
+
+                # this is to silence a FutureWarning
+                # TODO: Remove when default dtype of empty Series is object
+                kwargs = v._construct_axes_dict()
+                if v._constructor is Series:
+                    is_empty = "data" not in kwargs or not kwargs["data"]
+                    if "dtype" not in kwargs and is_empty:
+                        kwargs["dtype"] = object
+
                 values = [
-                    x if x is not None else v._constructor(**v._construct_axes_dict())
-                    for x in values
+                    x if (x is not None) else v._constructor(**kwargs) for x in values
                 ]
 
             v = values[0]

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -202,6 +202,19 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     def __init__(
         self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
     ):
+        no_data = data is None or (isinstance(data, (list, tuple, dict)) and not data)
+        if no_data and dtype is None:
+            # Empty Series should have dtype object to be consistent
+            # with the behaviour of DataFrame and Index
+            warnings.warn(
+                "The default dtype for empty Series will be 'object' instead"
+                " of 'float64' in the next version. Specify a dtype explicitly"
+                " to silence this warning.",
+                FutureWarning,
+                stacklevel=2,
+            )
+            # uncomment the line below when removing the FutureWarning
+            # dtype = np.dtype(object)
 
         # we are called internally, so short-circuit
         if fastpath:
@@ -357,7 +370,11 @@ def _init_dict(self, data, index=None, dtype=None):
             keys, values = [], []
 
         # Input is now list-like, so rely on "standard" construction:
-        s = Series(values, index=keys, dtype=dtype)
+        # TODO: warning filter can be removed when default dtype for Series
+        #  is changed to object.
+        with warnings.catch_warnings():
+            warnings.simplefilter(action="ignore", category=FutureWarning)
+            s = Series(values, index=keys, dtype=dtype)
 
         # Now we just make sure the order is respected, if any
         if data and index is not None:

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -145,7 +145,8 @@ def _maybe_cache(arg, format, cache, convert_listlike):
     """
     from pandas import Series
 
-    cache_array = Series()
+    cache_array = Series(dtype=object)
+
     if cache:
         # Perform a quicker unique check
         if not should_cache(arg):

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -767,7 +767,8 @@ def _parse_tfoot_tr(self, table):
 
 
 def _expand_elements(body):
-    lens = Series([len(elem) for elem in body])
+    dtype = None if body else object
+    lens = Series([len(elem) for elem in body], dtype=dtype)
     lens_max = lens.max()
     not_max = lens[lens != lens_max]
 

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict
+import functools
 from io import StringIO
 from itertools import islice
 import os
@@ -1005,43 +1006,51 @@ class SeriesParser(Parser):
 
     def _parse_no_numpy(self):
 
-        json = self.json
-        orient = self.orient
-        if orient == "split":
-            decoded = {
-                str(k): v
-                for k, v in loads(json, precise_float=self.precise_float).items()
-            }
+        data = loads(self.json, precise_float=self.precise_float)
+
+        if self.orient == "split":
+            is_empty = self._is_empty(data["data"])
+        else:
+            is_empty = self._is_empty(data)
+        dtype = object if is_empty else None
+
+        if self.orient == "split":
+            decoded = {str(k): v for k, v in data.items()}
             self.check_keys_split(decoded)
-            self.obj = Series(dtype=None, **decoded)
+            self.obj = Series(**decoded, dtype=dtype)
         else:
-            self.obj = Series(loads(json, precise_float=self.precise_float), dtype=None)
+            self.obj = Series(data, dtype=dtype)
 
     def _parse_numpy(self):
 
-        json = self.json
-        orient = self.orient
-        if orient == "split":
-            decoded = loads(
-                json, dtype=None, numpy=True, precise_float=self.precise_float
-            )
-            decoded = {str(k): v for k, v in decoded.items()}
+        kwargs = {"dtype": None, "numpy": True, "precise_float": self.precise_float}
+        if self.orient in ["columns", "index"]:
+            kwargs["labelled"] = True
+        loads_ = functools.partial(loads, **kwargs)
+        data = loads_(self.json)
+
+        # this is needed to silence a FutureWarning
+        # TODO: Remove this when the default dtype of empty Series is changed to object
+        if self.orient == "split":
+            is_empty = self._is_empty(data["data"])
+        else:
+            is_empty = self._is_empty(data)
+        dtype = object if is_empty else None
+
+        if self.orient == "split":
+            decoded = {str(k): v for k, v in data.items()}
             self.check_keys_split(decoded)
-            self.obj = Series(**decoded)
-        elif orient == "columns" or orient == "index":
-            self.obj = Series(
-                *loads(
-                    json,
-                    dtype=None,
-                    numpy=True,
-                    labelled=True,
-                    precise_float=self.precise_float,
-                )
-            )
+            self.obj = Series(**decoded, dtype=dtype)
+        elif self.orient in ["columns", "index"]:
+            self.obj = Series(*data, dtype=dtype)
         else:
-            self.obj = Series(
-                loads(json, dtype=None, numpy=True, precise_float=self.precise_float)
-            )
+            self.obj = Series(data, dtype=dtype)
+
+    @staticmethod
+    def _is_empty(data):
+        is_empty_np = isinstance(data, np.ndarray) and (data.size == 0)
+        is_empty_reg = isinstance(data, (list, tuple, dict)) and not data
+        return is_empty_np or is_empty_reg
 
     def _try_convert_types(self):
         if self.obj is None:

diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
@@ -115,7 +115,7 @@ def maybe_color_bp(self, bp):
 
     def _make_plot(self):
         if self.subplots:
-            self._return_obj = pd.Series()
+            self._return_obj = pd.Series(dtype=object)
 
             for i, (label, y) in enumerate(self._iter_data()):
                 ax = self._get_ax(i)
@@ -407,7 +407,8 @@ def boxplot_frame_groupby(
         )
         axes = _flatten(axes)
 
-        ret = pd.Series()
+        ret = pd.Series(dtype=object)
+
         for (key, group), ax in zip(grouped, axes):
             d = group.boxplot(
                 ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds

diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py
@@ -77,7 +77,7 @@ def test_replace(to_replace, value, result):
     tm.assert_categorical_equal(cat, expected)
 
 
-@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
+@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
 def test_isin_empty(empty):
     s = pd.Categorical(["a", "b"])
     expected = np.array([False, False], dtype=bool)