diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 722d0dcc10041..8c69820a54bce 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -512,6 +512,7 @@ Reshaping - Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`) - Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`) - Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`) +- Bug in :func:`concat` which ignored the ``sort`` parameter (:issue:`43375`) Sparse ^^^^^^ diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index b88a2e4c28cfb..dd1fa0780520c 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -55,7 +55,7 @@ def is_integer_array(values: np.ndarray, skipna: bool = ...): ... def is_bool_array(values: np.ndarray, skipna: bool = ...): ... def fast_multiget(mapping: dict, keys: np.ndarray, default=...) -> np.ndarray: ... def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ... -def fast_unique_multiple_list(lists: list, sort: bool = ...) -> list: ... +def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ... def fast_unique_multiple(arrays: list, sort: bool = ...) -> list: ... def map_infer( arr: np.ndarray, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 2c7b052917463..e7f889ef39707 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -350,7 +350,7 @@ def fast_unique_multiple(list arrays, sort: bool = True): @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: +def fast_unique_multiple_list(lists: list, sort: bool | None = True) -> list: cdef: list buf Py_ssize_t k = len(lists) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 8efc07a2ef148..e497012f23b68 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -147,7 +147,7 @@ def _get_combined_index( for other in indexes[1:]: index = index.intersection(other) else: - index = union_indexes(indexes, sort=sort) + index = union_indexes(indexes, sort=False) index = ensure_index(index) if sort: @@ -163,7 +163,7 @@ def _get_combined_index( return index -def union_indexes(indexes, sort: bool = True) -> Index: +def union_indexes(indexes, sort: bool | None = True) -> Index: """ Return the union of indexes. @@ -219,7 +219,7 @@ def conv(i): return result.union_many(indexes[1:]) else: for other in indexes[1:]: - result = result.union(other) + result = result.union(other, sort=None if sort else False) return result elif kind == "array": index = indexes[0] diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index aa4b734411a58..859f5171a6f04 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2354,10 +2354,8 @@ def test_construct_with_two_categoricalindex_series(self): ) result = DataFrame([s1, s2]) expected = DataFrame( - np.array( - [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]] - ), - columns=["f", "female", "m", "male", "unknown"], + np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]), + columns=["female", "male", "unknown", "f", "m"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_sort.py b/pandas/tests/reshape/concat/test_sort.py index 865f696b7a73a..3d362ef42d276 100644 --- a/pandas/tests/reshape/concat/test_sort.py +++ b/pandas/tests/reshape/concat/test_sort.py @@ -1,3 +1,5 @@ +import numpy as np + import pandas as pd from pandas import DataFrame import pandas._testing as tm @@ -81,3 +83,12 @@ def test_concat_aligned_sort_does_not_raise(self): expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"]) result = pd.concat([df, df], ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) + + def test_concat_frame_with_sort_false(self): + # GH 43375 + result = pd.concat( + [DataFrame({i: i}, index=[i]) for i in range(2, 0, -1)], sort=False + ) + expected = DataFrame([[2, np.nan], [np.nan, 1]], index=[2, 1], columns=[2, 1]) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 4972cb34aac69..af9d6dd83bee3 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -746,11 +746,11 @@ def test_unbalanced(self): ) df["id"] = df.index exp_data = { - "X": ["X1", "X1", "X2", "X2"], - "A": [1.0, 3.0, 2.0, 4.0], - "B": [5.0, np.nan, 6.0, np.nan], - "id": [0, 0, 1, 1], - "year": [2010, 2011, 2010, 2011], + "X": ["X1", "X2", "X1", "X2"], + "A": [1.0, 2.0, 3.0, 4.0], + "B": [5.0, 6.0, np.nan, np.nan], + "id": [0, 1, 0, 1], + "year": [2010, 2010, 2011, 2011], } expected = DataFrame(exp_data) expected = expected.set_index(["id", "year"])[["X", "A", "B"]] @@ -993,10 +993,10 @@ def test_nonnumeric_suffix(self): ) expected = DataFrame( { - "A": ["X1", "X1", "X2", "X2"], - "colname": ["placebo", "test", "placebo", "test"], - "result": [5.0, np.nan, 6.0, np.nan], - "treatment": [1.0, 3.0, 2.0, 4.0], + "A": ["X1", "X2", "X1", "X2"], + "colname": ["placebo", "placebo", "test", "test"], + "result": [5.0, 6.0, np.nan, np.nan], + "treatment": [1.0, 2.0, 3.0, 4.0], } ) expected = expected.set_index(["A", "colname"]) @@ -1040,10 +1040,10 @@ def test_float_suffix(self): ) expected = DataFrame( { - "A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"], - "colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], - "result": [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], - "treatment": [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0], + "A": ["X1", "X2", "X1", "X2", "X1", "X2", "X1", "X2"], + "colname": [1.2, 1.2, 1.0, 1.0, 1.1, 1.1, 2.1, 2.1], + "result": [5.0, 6.0, 0.0, 9.0, np.nan, np.nan, np.nan, np.nan], + "treatment": [np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 4.0], } ) expected = expected.set_index(["A", "colname"]) diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index 48f853cfdcb10..8abbc59343e78 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -278,7 +278,11 @@ def test_str_cat_align_mixed_inputs(join): expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"]) # joint index of rhs [t, u]; u will be forced have index of s rhs_idx = ( - t.index.intersection(s.index) if join == "inner" else t.index.union(s.index) + t.index.intersection(s.index) + if join == "inner" + else t.index.union(s.index) + if join == "outer" + else t.index.append(s.index.difference(t.index)) ) expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]