diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index a46c63945..905ada598 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -395,7 +395,8 @@ def sort( self, by: str | Iterable[str], *more_by: str, - descending: bool | Sequence[bool] = False, + descending: bool | Sequence[bool], + nulls_last: bool, ) -> Self: flat_keys = flatten([*flatten([by]), *more_by]) df = self._native_frame @@ -408,7 +409,10 @@ def sort( (key, "descending" if is_descending else "ascending") for key, is_descending in zip(flat_keys, descending) ] - return self._from_native_frame(df.sort_by(sorting=sorting)) + + null_placement = "at_end" if nulls_last else "at_start" + + return self._from_native_frame(df.sort_by(sorting, null_placement=null_placement)) def to_pandas(self) -> Any: return self._native_frame.to_pandas() diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 916583eaa..31949cf22 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -206,7 +206,8 @@ def sort( self: Self, by: str | Iterable[str], *more_by: str, - descending: bool | Sequence[bool] = False, + descending: bool | Sequence[bool], + nulls_last: bool, ) -> Self: flat_keys = flatten([*flatten([by]), *more_by]) df = self._native_frame @@ -214,7 +215,10 @@ def sort( ascending: bool | list[bool] = not descending else: ascending = [not d for d in descending] - return self._from_native_frame(df.sort_values(flat_keys, ascending=ascending)) + na_position = "last" if nulls_last else "first" + return self._from_native_frame( + df.sort_values(flat_keys, ascending=ascending, na_position=na_position) + ) def join( self: Self, diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index e808545ae..b9788b533 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -413,7 +413,8 @@ def sort( self, by: str | Iterable[str], *more_by: str, - descending: bool | Sequence[bool] = False, + descending: bool | Sequence[bool], + nulls_last: bool, ) -> Self: flat_keys = flatten([*flatten([by]), *more_by]) df = self._native_frame @@ -421,7 +422,10 @@ def sort( ascending: bool | list[bool] = not descending else: ascending = [not d for d in descending] - return self._from_native_frame(df.sort_values(flat_keys, ascending=ascending)) + na_position = "last" if nulls_last else "first" + return self._from_native_frame( + df.sort_values(flat_keys, ascending=ascending, na_position=na_position) + ) # --- convert --- def collect(self) -> PandasLikeDataFrame: diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index e4627484a..e4ad31b38 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -170,9 +170,12 @@ def sort( by: str | Iterable[str], *more_by: str, descending: bool | Sequence[bool] = False, + nulls_last: bool = False, ) -> Self: return self._from_compliant_dataframe( - self._compliant_frame.sort(by, *more_by, descending=descending) + self._compliant_frame.sort( + by, *more_by, descending=descending, nulls_last=nulls_last + ) ) def join( @@ -1944,19 +1947,22 @@ def sort( by: str | Iterable[str], *more_by: str, descending: bool | Sequence[bool] = False, + nulls_last: bool = False, ) -> Self: r""" Sort the dataframe by the given columns. Arguments: by: Column(s) names to sort by. + *more_by: Additional columns to sort by, specified as positional arguments. + descending: Sort in descending order. When sorting by multiple columns, can be + specified per column by passing a sequence of booleans. + nulls_last: Place null values last. - *more_by: Additional columns to sort by, specified as positional - arguments. - - descending: Sort in descending order. When sorting by multiple - columns, can be specified per column by passing a - sequence of booleans. + Warning: + Unlike Polars, it is not possible to specify a sequence of booleans for + `nulls_last` in order to control per-column behaviour. Instead a single + boolean is applied for all `by` columns. Examples: >>> import narwhals as nw @@ -1996,7 +2002,7 @@ def sort( │ 2 ┆ 5.0 ┆ c │ └──────┴─────┴─────┘ """ - return super().sort(by, *more_by, descending=descending) + return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last) def join( self, @@ -3858,20 +3864,23 @@ def sort( by: str | Iterable[str], *more_by: str, descending: bool | Sequence[bool] = False, + nulls_last: bool = False, ) -> Self: r""" Sort the LazyFrame by the given columns. Arguments: - by: Column(s) to sort by. Accepts expression input. Strings are - parsed as column names. - - *more_by: Additional columns to sort by, specified as positional - arguments. + by: Column(s) names to sort by. + *more_by: Additional columns to sort by, specified as positional arguments. + descending: Sort in descending order. When sorting by multiple columns, can be + specified per column by passing a sequence of booleans. + nulls_last: Place null values last; can specify a single boolean applying to + all columns or a sequence of booleans for per-column control. - descending: Sort in descending order. When sorting by multiple - columns, can be specified per column by passing a - sequence of booleans. + Warning: + Unlike Polars, it is not possible to specify a sequence of booleans for + `nulls_last` in order to control per-column behaviour. Instead a single + boolean is applied for all `by` columns. Examples: >>> import narwhals as nw @@ -3911,7 +3920,7 @@ def sort( │ 2 ┆ 5.0 ┆ c │ └──────┴─────┴─────┘ """ - return super().sort(by, *more_by, descending=descending) + return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last) def join( self, diff --git a/tests/frame/sort_test.py b/tests/frame/sort_test.py index 06f5d079f..bea9177df 100644 --- a/tests/frame/sort_test.py +++ b/tests/frame/sort_test.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +import pytest + import narwhals.stable.v1 as nw from tests.utils import Constructor from tests.utils import compare_dicts @@ -20,3 +24,19 @@ def test_sort(constructor: Constructor) -> None: "z": [8.0, 9.0, 7.0], } compare_dicts(result, expected) + + +@pytest.mark.parametrize( + ("nulls_last", "expected"), + [ + (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, float("nan")]}), + (False, {"a": [-1, 0, 2, 0], "b": [float("nan"), 3, 2, 1]}), + ], +) +def test_sort_nulls( + constructor: Constructor, *, nulls_last: bool, expected: dict[str, float] +) -> None: + data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]} + df = nw.from_native(constructor(data)) + result = df.sort("b", descending=True, nulls_last=nulls_last) + compare_dicts(result, expected)