Skip to content

Commit

Permalink
feat: allow format=None in str.to_datetime (narwhals-dev#1145)
Browse files Browse the repository at this point in the history
* WIP

* allow str.to_datetime with format=None
  • Loading branch information
FBruzzesi authored and akmalsoliev committed Oct 15, 2024
1 parent a699cbf commit 3ffeb7b
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 13 deletions.
2 changes: 1 addition & 1 deletion narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ def slice(self, offset: int, length: int | None = None) -> ArrowExpr:
self._expr, "str", "slice", offset, length
)

def to_datetime(self, format: str | None = None) -> ArrowExpr: # noqa: A002
def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002
return reuse_series_namespace_implementation(
self._expr,
"str",
Expand Down
6 changes: 5 additions & 1 deletion narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1033,9 +1033,13 @@ def slice(self: Self, offset: int, length: int | None = None) -> ArrowSeries:
),
)

def to_datetime(self: Self, format: str | None = None) -> ArrowSeries: # noqa: A002
def to_datetime(self: Self, format: str | None) -> ArrowSeries: # noqa: A002
import pyarrow.compute as pc # ignore-banned-import()

if format is None:
msg = "`format` is required for pyarrow backend."
raise ValueError(msg)

return self._arrow_series._from_native_series(
pc.strptime(self._arrow_series._native_series, format=format, unit="us")
)
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ def slice(self, offset: int, length: int | None = None) -> DaskExpr:
returns_scalar=False,
)

def to_datetime(self, format: str | None = None) -> DaskExpr: # noqa: A002
def to_datetime(self: Self, format: str | None) -> DaskExpr: # noqa: A002
import dask.dataframe as dd # ignore-banned-import()

return self._expr._from_call(
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeExpr:
self._expr, "str", "slice", offset, length
)

def to_datetime(self, format: str | None = None) -> PandasLikeExpr: # noqa: A002
def to_datetime(self: Self, format: str | None) -> PandasLikeExpr: # noqa: A002
return reuse_series_namespace_implementation(
self._expr,
"str",
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeSeries:
self._pandas_series._native_series.str.slice(start=offset, stop=stop),
)

def to_datetime(self, format: str | None = None) -> PandasLikeSeries: # noqa: A002
def to_datetime(self: Self, format: str | None) -> PandasLikeSeries: # noqa: A002
return self._pandas_series._from_native_series(
to_datetime(self._pandas_series._implementation)(
self._pandas_series._native_series, format=format
Expand Down
11 changes: 7 additions & 4 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2498,7 +2498,7 @@ def tail(self, n: int = 5) -> Expr:
"""
return self._expr.__class__(lambda plx: self._expr._call(plx).str.slice(-n))

def to_datetime(self, format: str) -> Expr: # noqa: A002
def to_datetime(self: Self, format: str | None = None) -> Expr: # noqa: A002
"""
Convert to Datetime dtype.
Expand All @@ -2508,10 +2508,13 @@ def to_datetime(self, format: str) -> Expr: # noqa: A002
in pandas, with no ability to set any other one. The ability to
set the time unit in pandas, if the version permits, will arrive.
Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.
Arguments:
format: Format to parse strings with. Must be passed, as different
dataframe libraries have different ways of auto-inferring
formats.
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.
Examples:
>>> import pandas as pd
Expand Down
11 changes: 7 additions & 4 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3083,7 +3083,7 @@ def to_lowercase(self) -> Series:
self._narwhals_series._compliant_series.str.to_lowercase()
)

def to_datetime(self, format: str) -> Series: # noqa: A002
def to_datetime(self: Self, format: str | None = None) -> Series: # noqa: A002
"""
Parse Series with strings to a Series with Datetime dtype.
Expand All @@ -3093,10 +3093,13 @@ def to_datetime(self, format: str) -> Series: # noqa: A002
in pandas, with no ability to set any other one. The ability to
set the time unit in pandas, if the version permits, will arrive.
Warning:
As different backends auto-infer format in different ways, if `format=None`
there is no guarantee that the result will be equal.
Arguments:
format: Format to parse strings with. Must be passed, as different
dataframe libraries have different ways of auto-inferring
formats.
format: Format to use for conversion. If set to None (default), the format is
inferred from the data.
Examples:
>>> import pandas as pd
Expand Down
40 changes: 40 additions & 0 deletions tests/expr_and_series/str/to_datetime_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor

Expand Down Expand Up @@ -34,3 +36,41 @@ def test_to_datetime_series(constructor_eager: Any) -> None:
)
).item(0)
assert str(result) == expected


def test_to_datetime_infer_fmt(
request: pytest.FixtureRequest, constructor: Constructor
) -> None:
if "pyarrow_table" in str(constructor):
request.applymarker(pytest.mark.xfail)

if "cudf" in str(constructor): # pragma: no cover
expected = "2020-01-01T12:34:56.000000000"
else:
expected = "2020-01-01 12:34:56"

result = (
nw.from_native(constructor(data))
.lazy()
.select(b=nw.col("a").str.to_datetime())
.collect()
.item(row=0, column="b")
)
assert str(result) == expected


def test_to_datetime_series_infer_fmt(
request: pytest.FixtureRequest, constructor_eager: Any
) -> None:
if "pyarrow_table" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)

if "cudf" in str(constructor_eager): # pragma: no cover
expected = "2020-01-01T12:34:56.000000000"
else:
expected = "2020-01-01 12:34:56"

result = (
nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime()
).item(0)
assert str(result) == expected

0 comments on commit 3ffeb7b

Please sign in to comment.