pandas-dev · jreback · Feb 12, 2021 · Feb 3, 2021 · Feb 7, 2021 · Feb 7, 2021
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -61,7 +61,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     return arr.astype(dtype, copy=False)
 
 
-def concat_compat(to_concat, axis: int = 0):
+def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
     """
     provide concatenation of an array of arrays each of which is a single
     'normalized' dtypes (in that for example, if it's object, then it is a
@@ -72,6 +72,8 @@ def concat_compat(to_concat, axis: int = 0):
     ----------
     to_concat : array of arrays
     axis : axis to provide concatenation
+    ea_compat_axis : bool, default False
+        For ExtensionArray compat, behave as if axis == 1
 
     Returns
     -------
@@ -91,7 +93,8 @@ def is_nonempty(x) -> bool:
     # marginal given that it would still require shape & dtype calculation and
     # np.concatenate which has them both implemented is compiled.
     non_empties = [x for x in to_concat if is_nonempty(x)]
-    if non_empties and axis == 0:
+    if non_empties and axis == 0 and not ea_compat_axis:
+        # ea_compat_axis see GH#39574
         to_concat = non_empties
 
     kinds = {obj.dtype.kind for obj in to_concat}

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -1,9 +1,8 @@
 from __future__ import annotations
 
-from collections import defaultdict
 import copy
 import itertools
-from typing import TYPE_CHECKING, Dict, List, Sequence, cast
+from typing import TYPE_CHECKING, Dict, List, Sequence
 
 import numpy as np
 
@@ -15,26 +14,21 @@
 from pandas.core.dtypes.common import (
     get_dtype,
     is_categorical_dtype,
-    is_datetime64_dtype,
     is_datetime64tz_dtype,
     is_extension_array_dtype,
-    is_float_dtype,
-    is_numeric_dtype,
     is_sparse,
-    is_timedelta64_dtype,
 )
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.missing import isna_all
 
 import pandas.core.algorithms as algos
-from pandas.core.arrays import DatetimeArray, ExtensionArray
+from pandas.core.arrays import DatetimeArray, ExtensionArray, TimedeltaArray
 from pandas.core.internals.array_manager import ArrayManager
 from pandas.core.internals.blocks import make_block
 from pandas.core.internals.managers import BlockManager
 
 if TYPE_CHECKING:
     from pandas import Index
-    from pandas.core.arrays.sparse.dtype import SparseDtype
 
 
 def concatenate_block_managers(
@@ -296,6 +290,8 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
                         empty_arr, allow_fill=True, fill_value=fill_value
                     )
                 else:
+                    # NB: we should never get here with empty_dtype integer or bool;
+                    #  if we did, the missing_arr.fill would cast to gibberish
                     missing_arr = np.empty(self.shape, dtype=empty_dtype)
                     missing_arr.fill(fill_value)
                     return missing_arr
@@ -363,9 +359,11 @@ def _concatenate_join_units(
         # concatting with at least one EA means we are concatting a single column
         # the non-EA values are 2D arrays with shape (1, n)
         to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
-        concat_values = concat_compat(to_concat, axis=0)
-        if not isinstance(concat_values, ExtensionArray) or (
-            isinstance(concat_values, DatetimeArray) and concat_values.tz is None
+        concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
+        if (
+            not isinstance(concat_values, ExtensionArray)
+            or (isinstance(concat_values, DatetimeArray) and concat_values.tz is None)
+            or isinstance(concat_values, TimedeltaArray)
         ):
             # if the result of concat is not an EA but an ndarray, reshape to
             # 2D to put it a non-EA Block
@@ -421,107 +419,18 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
 
     has_none_blocks = any(unit.block is None for unit in join_units)
     dtypes = [None if unit.block is None else unit.dtype for unit in join_units]
-
-    filtered_dtypes = [
-        unit.dtype for unit in join_units if unit.block is not None and not unit.is_na
-    ]
-    if not len(filtered_dtypes):
-        filtered_dtypes = [unit.dtype for unit in join_units if unit.block is not None]
-    dtype_alt = find_common_type(filtered_dtypes)
-
-    upcast_classes = _get_upcast_classes(join_units, dtypes)
-
-    if is_extension_array_dtype(dtype_alt):
-        return dtype_alt
-    elif dtype_alt == object:
-        return dtype_alt
-
-    # TODO: de-duplicate with maybe_promote?
-    # create the result
-    if "extension" in upcast_classes:
-        return np.dtype("object")
-    elif "bool" in upcast_classes:
-        if has_none_blocks:
-            return np.dtype(np.object_)
-        else:
-            return np.dtype(np.bool_)
-    elif "datetimetz" in upcast_classes:
-        # GH-25014. We use NaT instead of iNaT, since this eventually
-        # ends up in DatetimeArray.take, which does not allow iNaT.
-        dtype = upcast_classes["datetimetz"]
-        return dtype[0]
-    elif "datetime" in upcast_classes:
-        return np.dtype("M8[ns]")
-    elif "timedelta" in upcast_classes:
-        return np.dtype("m8[ns]")
-    else:
-        try:
-            common_dtype = np.find_common_type(upcast_classes, [])
-        except TypeError:
-            # At least one is an ExtensionArray
-            return np.dtype(np.object_)
-        else:
-            if is_float_dtype(common_dtype):
-                return common_dtype
-            elif is_numeric_dtype(common_dtype):
-                if has_none_blocks:
-                    return np.dtype(np.float64)
-                else:
-                    return common_dtype
-
-    msg = "invalid dtype determination in get_concat_dtype"
-    raise AssertionError(msg)
-
-
-def _get_upcast_classes(
-    join_units: Sequence[JoinUnit],
-    dtypes: Sequence[DtypeObj],
-) -> Dict[str, List[DtypeObj]]:
-    """Create mapping between upcast class names and lists of dtypes."""
-    upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list)
-    null_upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list)
-    for dtype, unit in zip(dtypes, join_units):
-        if dtype is None:
-            continue
-
-        upcast_cls = _select_upcast_cls_from_dtype(dtype)
-        # Null blocks should not influence upcast class selection, unless there
-        # are only null blocks, when same upcasting rules must be applied to
-        # null upcast classes.
-        if unit.is_na:
-            null_upcast_classes[upcast_cls].append(dtype)
-        else:
-            upcast_classes[upcast_cls].append(dtype)
-
-    if not upcast_classes:
-        upcast_classes = null_upcast_classes
-
-    return upcast_classes
-
-
-def _select_upcast_cls_from_dtype(dtype: DtypeObj) -> str:
-    """Select upcast class name based on dtype."""
-    if is_categorical_dtype(dtype):
-        return "extension"
-    elif is_datetime64tz_dtype(dtype):
-        return "datetimetz"
-    elif is_extension_array_dtype(dtype):
-        return "extension"
-    elif issubclass(dtype.type, np.bool_):
-        return "bool"
-    elif issubclass(dtype.type, np.object_):
-        return "object"
-    elif is_datetime64_dtype(dtype):
-        return "datetime"
-    elif is_timedelta64_dtype(dtype):
-        return "timedelta"
-    elif is_sparse(dtype):
-        dtype = cast("SparseDtype", dtype)
-        return dtype.subtype.name
-    elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
-        return dtype.name
-    else:
-        return "float"
+    dtypes = [x for x in dtypes if x is not None]
+
+    dtype = find_common_type(dtypes)
+    if has_none_blocks:
+        if not isinstance(dtype, np.dtype):
+            # EA dtype
+            pass
+        elif dtype.kind in ["i", "u"]:
+            dtype = np.dtype(np.float64)
+        elif dtype.kind == "b":
+            dtype = np.dtype(object)
+    return dtype
 
 
 def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:

diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py
@@ -165,23 +165,23 @@ def test_append_dtypes(self):
         df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
         result = df1.append(df2)
         expected = DataFrame(
-            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
+            {"bar": Series([Timestamp("20130101"), np.nan], dtype="object")}
         )
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
         df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
         result = df1.append(df2)
         expected = DataFrame(
-            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
+            {"bar": Series([Timestamp("20130101"), np.nan], dtype="object")}
         )
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": np.nan}, index=range(1))
         df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
         result = df1.append(df2)
         expected = DataFrame(
-            {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
+            {"bar": Series([np.nan, Timestamp("20130101")], dtype="object")}
         )
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
@@ -160,7 +160,8 @@ def test_partial_setting_mixed_dtype(self):
         df = DataFrame(columns=["A", "B"])
         df.loc[0] = Series(1, index=["B"])
 
-        exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
+        # TODO: having this be float64 would not be unreasonable
+        exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="object")
         tm.assert_frame_equal(df, exp)
 
         # list-like must conform

diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py
@@ -334,29 +334,34 @@ def test_append_missing_column_proper_upcast(self, sort):
     def test_append_empty_frame_to_series_with_dateutil_tz(self):
         # GH 23682
         date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
-        s = Series({"date": date, "a": 1.0, "b": 2.0})
+        ser = Series({"date": date, "a": 1.0, "b": 2.0})
         df = DataFrame(columns=["c", "d"])
-        result_a = df.append(s, ignore_index=True)
+        result_a = df.append(ser, ignore_index=True)
         expected = DataFrame(
             [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
         )
         # These columns get cast to object after append
         expected["c"] = expected["c"].astype(object)
         expected["d"] = expected["d"].astype(object)
+        expected["date"] = expected["date"].astype(object)
+        # TODO: "date" might make sense to keep as dt64tz
         tm.assert_frame_equal(result_a, expected)
 
         expected = DataFrame(
             [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
         )
         expected["c"] = expected["c"].astype(object)
         expected["d"] = expected["d"].astype(object)
-
-        result_b = result_a.append(s, ignore_index=True)
+        expected["date"] = expected["date"].astype(object)
+        # TODO: "date" might make sense to keep as dt64tz
+        result_b = result_a.append(ser, ignore_index=True)
         tm.assert_frame_equal(result_b, expected)
 
         # column order is different
         expected = expected[["c", "d", "date", "a", "b"]]
-        result = df.append([s, s], ignore_index=True)
+        dtype = Series([date]).dtype
+        expected["date"] = expected["date"].astype(dtype)
+        result = df.append([ser, ser], ignore_index=True)
         tm.assert_frame_equal(result, expected)
 
     def test_append_empty_tz_frame_with_datetime64ns(self):
@@ -378,12 +383,27 @@ def test_append_empty_tz_frame_with_datetime64ns(self):
     @pytest.mark.parametrize(
         "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
     )
-    def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str):
+    @pytest.mark.parametrize("val", [1, "NaT"])
+    def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val):
         # https://github.com/pandas-dev/pandas/issues/35460
         df = DataFrame(columns=["a"]).astype(dtype_str)
 
-        other = DataFrame({"a": [np.timedelta64("NaT", "ns")]})
+        other = DataFrame({"a": [np.timedelta64(val, "ns")]})
         result = df.append(other, ignore_index=True)
 
         expected = other.astype(object)
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
+    )
+    @pytest.mark.parametrize("val", [1, "NaT"])
+    def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val):
+        # https://github.com/pandas-dev/pandas/issues/35460
+        df = DataFrame({"a": pd.array([1], dtype=dtype_str)})
+
+        other = DataFrame({"a": [np.timedelta64(val, "ns")]})
+        result = df.append(other, ignore_index=True)
+
+        expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -670,6 +670,7 @@ def test_join_append_timedeltas(self):
                 "t": [timedelta(0, 22500), timedelta(0, 22500)],
             }
         )
+        expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 
         td = np.timedelta64(300000000)