Skip to content

Commit

Permalink
feat: add dt.replace_time_zone (narwhals-dev#1142)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored and akmalsoliev committed Oct 15, 2024
1 parent 31c857b commit 64223b0
Show file tree
Hide file tree
Showing 14 changed files with 577 additions and 9 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=85
- name: Run doctests
if: startsWith(matrix.os, 'windows') != true
run: pytest narwhals --doctest-modules

pytest-windows:
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/expr_dt.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
handler: python
options:
members:
- convert_time_zone
- date
- year
- month
Expand All @@ -15,6 +16,7 @@
- millisecond
- microsecond
- nanosecond
- replace_time_zone
- total_minutes
- total_seconds
- total_milliseconds
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/series_dt.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
handler: python
options:
members:
- convert_time_zone
- date
- year
- month
Expand All @@ -15,6 +16,7 @@
- millisecond
- microsecond
- nanosecond
- replace_time_zone
- total_minutes
- total_seconds
- total_milliseconds
Expand Down
10 changes: 10 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,16 @@ def to_string(self: Self, format: str) -> ArrowExpr: # noqa: A002
self._expr, "dt", "to_string", format
)

def replace_time_zone(self: Self, time_zone: str | None) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "replace_time_zone", time_zone
)

def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "convert_time_zone", time_zone
)

def date(self: Self) -> ArrowExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "date")

Expand Down
25 changes: 25 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,31 @@ def to_string(self: Self, format: str) -> ArrowSeries: # noqa: A002
pc.strftime(self._arrow_series._native_series, format)
)

def replace_time_zone(self: Self, time_zone: str | None) -> ArrowSeries:
import pyarrow.compute as pc # ignore-banned-import()

if time_zone is not None:
result = pc.assume_timezone(
pc.local_timestamp(self._arrow_series._native_series), time_zone
)
else:
result = pc.local_timestamp(self._arrow_series._native_series)
return self._arrow_series._from_native_series(result)

def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:
import pyarrow as pa # ignore-banned-import

if self._arrow_series.dtype.time_zone is None: # type: ignore[attr-defined]
result = self.replace_time_zone("UTC")._native_series.cast(
pa.timestamp(self._arrow_series._native_series.type.unit, time_zone)
)
else:
result = self._arrow_series._native_series.cast(
pa.timestamp(self._arrow_series._native_series.type.unit, time_zone)
)

return self._arrow_series._from_native_series(result)

def date(self: Self) -> ArrowSeries:
import pyarrow as pa # ignore-banned-import()

Expand Down
28 changes: 28 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals._dask.utils import add_row_index
from narwhals._dask.utils import maybe_evaluate
from narwhals._dask.utils import narwhals_to_native_dtype
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals.utils import generate_unique_token

if TYPE_CHECKING:
Expand Down Expand Up @@ -925,6 +926,33 @@ def to_string(self, format: str) -> DaskExpr: # noqa: A002
returns_scalar=False,
)

def replace_time_zone(self, time_zone: str | None) -> DaskExpr:
return self._expr._from_call(
lambda _input, _time_zone: _input.dt.tz_localize(None).dt.tz_localize(
_time_zone
)
if _time_zone is not None
else _input.dt.tz_localize(None),
"tz_localize",
time_zone,
returns_scalar=False,
)

def convert_time_zone(self, time_zone: str) -> DaskExpr:
def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series:
dtype = native_to_narwhals_dtype(s, self._expr._dtypes)
if dtype.time_zone is None: # type: ignore[attr-defined]
return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
else:
return s.dt.tz_convert(time_zone)

return self._expr._from_call(
func,
"tz_convert",
time_zone,
returns_scalar=False,
)

def total_minutes(self) -> DaskExpr:
return self._expr._from_call(
lambda _input: _input.dt.total_seconds() // 60,
Expand Down
10 changes: 10 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,16 @@ def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002
self._expr, "dt", "to_string", format
)

def replace_time_zone(self, time_zone: str | None) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "replace_time_zone", time_zone
)

def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "convert_time_zone", time_zone
)


class PandasLikeExprNameNamespace:
def __init__(self: Self, expr: PandasLikeExpr) -> None:
Expand Down
18 changes: 18 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,3 +928,21 @@ def to_string(self, format: str) -> PandasLikeSeries: # noqa: A002
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.strftime(format)
)

def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries:
if time_zone is not None:
result = self._pandas_series._native_series.dt.tz_localize(
None
).dt.tz_localize(time_zone)
else:
result = self._pandas_series._native_series.dt.tz_localize(None)
return self._pandas_series._from_native_series(result)

def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
if self._pandas_series.dtype.time_zone is None: # type: ignore[attr-defined]
result = self._pandas_series._native_series.dt.tz_localize(
"UTC"
).dt.tz_convert(time_zone)
else:
result = self._pandas_series._native_series.dt.tz_convert(time_zone)
return self._pandas_series._from_native_series(result)
20 changes: 12 additions & 8 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ def set_axis(
return obj.set_axis(index, axis=0, **kwargs) # type: ignore[attr-defined, no-any-return]


def native_to_narwhals_dtype(column: Any, dtypes: DTypes) -> DType:
dtype = str(column.dtype)
def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
dtype = str(native_column.dtype)

pd_datetime_rgx = (
r"^datetime64\[(?P<time_unit>s|ms|us|ns)(?:, (?P<time_zone>[a-zA-Z\/]+))?\]$"
Expand Down Expand Up @@ -282,26 +282,30 @@ def native_to_narwhals_dtype(column: Any, dtypes: DTypes) -> DType:
return dtypes.Date()
if dtype.startswith(("large_list", "list")):
return dtypes.List(
arrow_native_to_narwhals_dtype(column.dtype.pyarrow_dtype.value_type, dtypes)
arrow_native_to_narwhals_dtype(
native_column.dtype.pyarrow_dtype.value_type, dtypes
)
)
if dtype.startswith("fixed_size_list"):
return dtypes.Array(
arrow_native_to_narwhals_dtype(column.dtype.pyarrow_dtype.value_type, dtypes),
column.dtype.pyarrow_dtype.list_size,
arrow_native_to_narwhals_dtype(
native_column.dtype.pyarrow_dtype.value_type, dtypes
),
native_column.dtype.pyarrow_dtype.list_size,
)
if dtype.startswith("struct"):
return dtypes.Struct()
if dtype == "object":
if ( # pragma: no cover TODO(unassigned): why does this show as uncovered?
idx := getattr(column, "first_valid_index", lambda: None)()
) is not None and isinstance(column.loc[idx], str):
idx := getattr(native_column, "first_valid_index", lambda: None)()
) is not None and isinstance(native_column.loc[idx], str):
# Infer based on first non-missing value.
# For pandas pre 3.0, this isn't perfect.
# After pandas 3.0, pandas has a dedicated string dtype
# which is inferred by default.
return dtypes.String()
else:
df = column.to_frame()
df = native_column.to_frame()
if hasattr(df, "__dataframe__"):
from narwhals._interchange.dataframe import (
map_interchange_dtype_to_narwhals_dtype,
Expand Down
3 changes: 2 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Iterable
from typing import Iterator
from typing import Literal
from typing import NoReturn
from typing import Sequence
from typing import TypeVar
from typing import overload
Expand Down Expand Up @@ -2787,7 +2788,7 @@ def __repr__(self) -> str: # pragma: no cover
+ "┘"
)

def __getitem__(self, item: str | slice) -> Series | Self:
def __getitem__(self, item: str | slice) -> NoReturn:
msg = "Slicing is not supported on LazyFrame"
raise TypeError(msg)

Expand Down
113 changes: 113 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3501,6 +3501,119 @@ def to_string(self, format: str) -> Expr: # noqa: A002
lambda plx: self._expr._call(plx).dt.to_string(format)
)

def replace_time_zone(self, time_zone: str | None) -> Expr:
"""
Replace time zone.
Arguments:
time_zone: Target time zone.
Examples:
>>> from datetime import datetime, timezone
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {
... "a": [
... datetime(2024, 1, 1, tzinfo=timezone.utc),
... datetime(2024, 1, 2, tzinfo=timezone.utc),
... ]
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").dt.replace_time_zone("Asia/Kathmandu"))
We can then pass pandas / PyArrow / Polars / any other supported library:
>>> func(df_pd)
a
0 2024-01-01 00:00:00+05:45
1 2024-01-02 00:00:00+05:45
>>> func(df_pl)
shape: (2, 1)
┌──────────────────────────────┐
│ a │
│ --- │
│ datetime[μs, Asia/Kathmandu] │
╞══════════════════════════════╡
│ 2024-01-01 00:00:00 +0545 │
│ 2024-01-02 00:00:00 +0545 │
└──────────────────────────────┘
>>> func(df_pa) # doctest:+SKIP
pyarrow.Table
a: timestamp[us, tz=Asia/Kathmandu]
----
a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]]
"""
return self._expr.__class__(
lambda plx: self._expr._call(plx).dt.replace_time_zone(time_zone)
)

def convert_time_zone(self, time_zone: str) -> Expr:
"""
Convert to a new time zone.
Arguments:
time_zone: Target time zone.
Examples:
>>> from datetime import datetime, timezone
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {
... "a": [
... datetime(2024, 1, 1, tzinfo=timezone.utc),
... datetime(2024, 1, 2, tzinfo=timezone.utc),
... ]
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").dt.convert_time_zone("Asia/Kathmandu"))
We can then pass pandas / PyArrow / Polars / any other supported library:
>>> func(df_pd)
a
0 2024-01-01 05:45:00+05:45
1 2024-01-02 05:45:00+05:45
>>> func(df_pl)
shape: (2, 1)
┌──────────────────────────────┐
│ a │
│ --- │
│ datetime[μs, Asia/Kathmandu] │
╞══════════════════════════════╡
│ 2024-01-01 05:45:00 +0545 │
│ 2024-01-02 05:45:00 +0545 │
└──────────────────────────────┘
>>> func(df_pa) # doctest:+SKIP
pyarrow.Table
a: timestamp[us, tz=Asia/Kathmandu]
----
a: [[2024-01-01 00:00:00.000000Z,2024-01-02 00:00:00.000000Z]]
"""
if time_zone is None:
msg = "Target `time_zone` cannot be `None` in `convert_time_zone`. Please use `replace_time_zone(None)` if you want to remove the time zone."
raise TypeError(msg)
return self._expr.__class__(
lambda plx: self._expr._call(plx).dt.convert_time_zone(time_zone)
)


class ExprNameNamespace:
def __init__(self: Self, expr: Expr) -> None:
Expand Down
Loading

0 comments on commit 64223b0

Please sign in to comment.