diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 6d1001c110..c70425efe3 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -540,7 +540,7 @@ def slice(self, offset: int, length: int | None = None) -> ArrowExpr: self._expr, "str", "slice", offset, length ) - def to_datetime(self, format: str | None = None) -> ArrowExpr: # noqa: A002 + def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002 return reuse_series_namespace_implementation( self._expr, "str", diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 183cf37b7f..5070234985 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1033,9 +1033,13 @@ def slice(self: Self, offset: int, length: int | None = None) -> ArrowSeries: ), ) - def to_datetime(self: Self, format: str | None = None) -> ArrowSeries: # noqa: A002 + def to_datetime(self: Self, format: str | None) -> ArrowSeries: # noqa: A002 import pyarrow.compute as pc # ignore-banned-import() + if format is None: + msg = "`format` is required for pyarrow backend." + raise ValueError(msg) + return self._arrow_series._from_native_series( pc.strptime(self._arrow_series._native_series, format=format, unit="us") ) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index d8d86692e3..10b95bc89e 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -811,7 +811,7 @@ def slice(self, offset: int, length: int | None = None) -> DaskExpr: returns_scalar=False, ) - def to_datetime(self, format: str | None = None) -> DaskExpr: # noqa: A002 + def to_datetime(self: Self, format: str | None) -> DaskExpr: # noqa: A002 import dask.dataframe as dd # ignore-banned-import() return self._expr._from_call( diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 4e3011446c..2ebadbe163 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -486,7 +486,7 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeExpr: self._expr, "str", "slice", offset, length ) - def to_datetime(self, format: str | None = None) -> PandasLikeExpr: # noqa: A002 + def to_datetime(self: Self, format: str | None) -> PandasLikeExpr: # noqa: A002 return reuse_series_namespace_implementation( self._expr, "str", diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 74e1c492db..9cca664057 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -757,7 +757,7 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeSeries: self._pandas_series._native_series.str.slice(start=offset, stop=stop), ) - def to_datetime(self, format: str | None = None) -> PandasLikeSeries: # noqa: A002 + def to_datetime(self: Self, format: str | None) -> PandasLikeSeries: # noqa: A002 return self._pandas_series._from_native_series( to_datetime(self._pandas_series._implementation)( self._pandas_series._native_series, format=format diff --git a/narwhals/expr.py b/narwhals/expr.py index 59e1ff76c4..8446d81c38 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -2498,7 +2498,7 @@ def tail(self, n: int = 5) -> Expr: """ return self._expr.__class__(lambda plx: self._expr._call(plx).str.slice(-n)) - def to_datetime(self, format: str) -> Expr: # noqa: A002 + def to_datetime(self: Self, format: str | None = None) -> Expr: # noqa: A002 """ Convert to Datetime dtype. @@ -2508,10 +2508,13 @@ def to_datetime(self, format: str) -> Expr: # noqa: A002 in pandas, with no ability to set any other one. The ability to set the time unit in pandas, if the version permits, will arrive. + Warning: + As different backends auto-infer format in different ways, if `format=None` + there is no guarantee that the result will be equal. + Arguments: - format: Format to parse strings with. Must be passed, as different - dataframe libraries have different ways of auto-inferring - formats. + format: Format to use for conversion. If set to None (default), the format is + inferred from the data. Examples: >>> import pandas as pd diff --git a/narwhals/series.py b/narwhals/series.py index bb97090688..1753598c17 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -3083,7 +3083,7 @@ def to_lowercase(self) -> Series: self._narwhals_series._compliant_series.str.to_lowercase() ) - def to_datetime(self, format: str) -> Series: # noqa: A002 + def to_datetime(self: Self, format: str | None = None) -> Series: # noqa: A002 """ Parse Series with strings to a Series with Datetime dtype. @@ -3093,10 +3093,13 @@ def to_datetime(self, format: str) -> Series: # noqa: A002 in pandas, with no ability to set any other one. The ability to set the time unit in pandas, if the version permits, will arrive. + Warning: + As different backends auto-infer format in different ways, if `format=None` + there is no guarantee that the result will be equal. + Arguments: - format: Format to parse strings with. Must be passed, as different - dataframe libraries have different ways of auto-inferring - formats. + format: Format to use for conversion. If set to None (default), the format is + inferred from the data. Examples: >>> import pandas as pd diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index a64a3c58be..8474357e07 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -1,5 +1,7 @@ from typing import Any +import pytest + import narwhals.stable.v1 as nw from tests.utils import Constructor @@ -34,3 +36,41 @@ def test_to_datetime_series(constructor_eager: Any) -> None: ) ).item(0) assert str(result) == expected + + +def test_to_datetime_infer_fmt( + request: pytest.FixtureRequest, constructor: Constructor +) -> None: + if "pyarrow_table" in str(constructor): + request.applymarker(pytest.mark.xfail) + + if "cudf" in str(constructor): # pragma: no cover + expected = "2020-01-01T12:34:56.000000000" + else: + expected = "2020-01-01 12:34:56" + + result = ( + nw.from_native(constructor(data)) + .lazy() + .select(b=nw.col("a").str.to_datetime()) + .collect() + .item(row=0, column="b") + ) + assert str(result) == expected + + +def test_to_datetime_series_infer_fmt( + request: pytest.FixtureRequest, constructor_eager: Any +) -> None: + if "pyarrow_table" in str(constructor_eager): + request.applymarker(pytest.mark.xfail) + + if "cudf" in str(constructor_eager): # pragma: no cover + expected = "2020-01-01T12:34:56.000000000" + else: + expected = "2020-01-01 12:34:56" + + result = ( + nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime() + ).item(0) + assert str(result) == expected