Skip to content

Commit

Permalink
feat: add dt.replace_time_zone and dt.convert_time_zone
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Oct 8, 2024
1 parent f2b7a40 commit a560aae
Show file tree
Hide file tree
Showing 20 changed files with 558 additions and 160 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=85
- name: Run doctests
if: startsWith(matrix.os, 'windows') != true
run: pytest narwhals --doctest-modules

pytest-windows:
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/expr_dt.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
handler: python
options:
members:
- convert_time_zone
- date
- year
- month
Expand All @@ -15,6 +16,7 @@
- millisecond
- microsecond
- nanosecond
- replace_time_zone
- total_minutes
- total_seconds
- total_milliseconds
Expand Down
2 changes: 1 addition & 1 deletion docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
- ends_with
- head
- len_chars
- slice
- replace
- replace_all
- slice
- starts_with
- strip_chars
- tail
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/series_dt.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
handler: python
options:
members:
- convert_time_zone
- date
- year
- month
Expand All @@ -15,6 +16,7 @@
- millisecond
- microsecond
- nanosecond
- replace_time_zone
- total_minutes
- total_seconds
- total_milliseconds
Expand Down
4 changes: 1 addition & 3 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
- slice
- starts_with
- strip_chars
- tail
- to_datetime
- to_lowercase
- to_uppercase
- tail
show_source: false
show_bases: false
12 changes: 11 additions & 1 deletion narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,16 @@ def to_string(self: Self, format: str) -> ArrowExpr: # noqa: A002
self._expr, "dt", "to_string", format
)

def replace_time_zone(self: Self, time_zone: str | None) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "replace_time_zone", time_zone
)

def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "convert_time_zone", time_zone
)

def date(self: Self) -> ArrowExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "date")

Expand Down Expand Up @@ -540,7 +550,7 @@ def slice(self, offset: int, length: int | None = None) -> ArrowExpr:
self._expr, "str", "slice", offset, length
)

def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002
def to_datetime(self, format: str | None = None) -> ArrowExpr: # noqa: A002
return reuse_series_namespace_implementation(
self._expr,
"str",
Expand Down
31 changes: 26 additions & 5 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,31 @@ def to_string(self: Self, format: str) -> ArrowSeries: # noqa: A002
pc.strftime(self._arrow_series._native_series, format)
)

def replace_time_zone(self: Self, time_zone: str | None) -> ArrowSeries:
import pyarrow.compute as pc # ignore-banned-import()

if time_zone is not None:
result = pc.assume_timezone(
pc.local_timestamp(self._arrow_series._native_series), time_zone
)
else:
result = pc.local_timestamp(self._arrow_series._native_series)
return self._arrow_series._from_native_series(result)

def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:
import pyarrow as pa # ignore-banned-import

if self._arrow_series.dtype.time_zone is None: # type: ignore[attr-defined]
result = self.replace_time_zone("UTC")._native_series.cast(
pa.timestamp(self._arrow_series._native_series.type.unit, time_zone)
)
else:
result = self._arrow_series._native_series.cast(
pa.timestamp(self._arrow_series._native_series.type.unit, time_zone)
)

return self._arrow_series._from_native_series(result)

def date(self: Self) -> ArrowSeries:
import pyarrow as pa # ignore-banned-import()

Expand Down Expand Up @@ -1033,13 +1058,9 @@ def slice(self: Self, offset: int, length: int | None = None) -> ArrowSeries:
),
)

def to_datetime(self: Self, format: str | None) -> ArrowSeries: # noqa: A002
def to_datetime(self: Self, format: str | None = None) -> ArrowSeries: # noqa: A002
import pyarrow.compute as pc # ignore-banned-import()

if format is None:
msg = "`format` is required for pyarrow backend."
raise ValueError(msg)

return self._arrow_series._from_native_series(
pc.strptime(self._arrow_series._native_series, format=format, unit="us")
)
Expand Down
30 changes: 29 additions & 1 deletion narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals._dask.utils import add_row_index
from narwhals._dask.utils import maybe_evaluate
from narwhals._dask.utils import narwhals_to_native_dtype
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals.utils import generate_unique_token

if TYPE_CHECKING:
Expand Down Expand Up @@ -811,7 +812,7 @@ def slice(self, offset: int, length: int | None = None) -> DaskExpr:
returns_scalar=False,
)

def to_datetime(self: Self, format: str | None) -> DaskExpr: # noqa: A002
def to_datetime(self, format: str | None = None) -> DaskExpr: # noqa: A002
import dask.dataframe as dd # ignore-banned-import()

return self._expr._from_call(
Expand Down Expand Up @@ -925,6 +926,33 @@ def to_string(self, format: str) -> DaskExpr: # noqa: A002
returns_scalar=False,
)

def replace_time_zone(self, time_zone: str | None) -> DaskExpr:
return self._expr._from_call(
lambda _input, _time_zone: _input.dt.tz_localize(None).dt.tz_localize(
_time_zone
)
if _time_zone is not None
else _input.dt.tz_localize(None),
"tz_localize",
time_zone,
returns_scalar=False,
)

def convert_time_zone(self, time_zone: str) -> DaskExpr:
def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series:
dtype = native_to_narwhals_dtype(s, self._expr._dtypes)
if dtype.time_zone is None: # type: ignore[attr-defined]
return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
else:
return s.dt.tz_convert(time_zone)

return self._expr._from_call(
func,
"tz_convert",
time_zone,
returns_scalar=False,
)

def total_minutes(self) -> DaskExpr:
return self._expr._from_call(
lambda _input: _input.dt.total_seconds() // 60,
Expand Down
10 changes: 5 additions & 5 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def get_column(self, name: str) -> PandasLikeSeries:
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
self._native_frame[name],
self._native_frame.loc[:, name],
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down Expand Up @@ -157,7 +157,7 @@ def __getitem__(
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
self._native_frame[item],
self._native_frame.loc[:, item],
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down Expand Up @@ -276,7 +276,7 @@ def iter_rows(
@property
def schema(self) -> dict[str, DType]:
return {
col: native_to_narwhals_dtype(self._native_frame[col], self._dtypes)
col: native_to_narwhals_dtype(self._native_frame.loc[:, col], self._dtypes)
for col in self._native_frame.columns
}

Expand Down Expand Up @@ -382,7 +382,7 @@ def with_columns(
)
)
else:
to_concat.append(self._native_frame[name])
to_concat.append(self._native_frame.loc[:, name])
to_concat.extend(
validate_dataframe_comparand(index, new_column_name_to_new_column_map[s])
for s in new_column_name_to_new_column_map
Expand Down Expand Up @@ -637,7 +637,7 @@ def to_dict(self, *, as_series: bool = False) -> dict[str, Any]:
# TODO(Unassigned): should this return narwhals series?
return {
col: PandasLikeSeries(
self._native_frame[col],
self._native_frame.loc[:, col],
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down
14 changes: 12 additions & 2 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def from_column_names(
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [
PandasLikeSeries(
df._native_frame[column_name],
df._native_frame.loc[:, column_name],
implementation=df._implementation,
backend_version=df._backend_version,
dtypes=df._dtypes,
Expand Down Expand Up @@ -486,7 +486,7 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeExpr:
self._expr, "str", "slice", offset, length
)

def to_datetime(self: Self, format: str | None) -> PandasLikeExpr: # noqa: A002
def to_datetime(self, format: str | None = None) -> PandasLikeExpr: # noqa: A002
return reuse_series_namespace_implementation(
self._expr,
"str",
Expand Down Expand Up @@ -572,6 +572,16 @@ def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002
self._expr, "dt", "to_string", format
)

def replace_time_zone(self, time_zone: str | None) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "replace_time_zone", time_zone
)

def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "convert_time_zone", time_zone
)


class PandasLikeExprNameNamespace:
def __init__(self: Self, expr: PandasLikeExpr) -> None:
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def all(self) -> PandasLikeExpr:
return PandasLikeExpr(
lambda df: [
PandasLikeSeries(
df._native_frame[column_name],
df._native_frame.loc[:, column_name],
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down
20 changes: 19 additions & 1 deletion narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeSeries:
self._pandas_series._native_series.str.slice(start=offset, stop=stop),
)

def to_datetime(self: Self, format: str | None) -> PandasLikeSeries: # noqa: A002
def to_datetime(self, format: str | None = None) -> PandasLikeSeries: # noqa: A002
return self._pandas_series._from_native_series(
to_datetime(self._pandas_series._implementation)(
self._pandas_series._native_series, format=format
Expand Down Expand Up @@ -928,3 +928,21 @@ def to_string(self, format: str) -> PandasLikeSeries: # noqa: A002
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.strftime(format)
)

def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries:
if time_zone is not None:
result = self._pandas_series._native_series.dt.tz_localize(
None
).dt.tz_localize(time_zone)
else:
result = self._pandas_series._native_series.dt.tz_localize(None)
return self._pandas_series._from_native_series(result)

def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
if self._pandas_series.dtype.time_zone is None: # type: ignore[attr-defined]
result = self._pandas_series._native_series.dt.tz_localize(
"UTC"
).dt.tz_convert(time_zone)
else:
result = self._pandas_series._native_series.dt.tz_convert(time_zone)
return self._pandas_series._from_native_series(result)
20 changes: 12 additions & 8 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ def set_axis(
return obj.set_axis(index, axis=0, **kwargs) # type: ignore[attr-defined, no-any-return]


def native_to_narwhals_dtype(column: Any, dtypes: DTypes) -> DType:
dtype = str(column.dtype)
def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
dtype = str(native_column.dtype)

pd_datetime_rgx = (
r"^datetime64\[(?P<time_unit>s|ms|us|ns)(?:, (?P<time_zone>[a-zA-Z\/]+))?\]$"
Expand Down Expand Up @@ -282,26 +282,30 @@ def native_to_narwhals_dtype(column: Any, dtypes: DTypes) -> DType:
return dtypes.Date()
if dtype.startswith(("large_list", "list")):
return dtypes.List(
arrow_native_to_narwhals_dtype(column.dtype.pyarrow_dtype.value_type, dtypes)
arrow_native_to_narwhals_dtype(
native_column.dtype.pyarrow_dtype.value_type, dtypes
)
)
if dtype.startswith("fixed_size_list"):
return dtypes.Array(
arrow_native_to_narwhals_dtype(column.dtype.pyarrow_dtype.value_type, dtypes),
column.dtype.pyarrow_dtype.list_size,
arrow_native_to_narwhals_dtype(
native_column.dtype.pyarrow_dtype.value_type, dtypes
),
native_column.dtype.pyarrow_dtype.list_size,
)
if dtype.startswith("struct"):
return dtypes.Struct()
if dtype == "object":
if ( # pragma: no cover TODO(unassigned): why does this show as uncovered?
idx := getattr(column, "first_valid_index", lambda: None)()
) is not None and isinstance(column.loc[idx], str):
idx := getattr(native_column, "first_valid_index", lambda: None)()
) is not None and isinstance(native_column.loc[idx], str):
# Infer based on first non-missing value.
# For pandas pre 3.0, this isn't perfect.
# After pandas 3.0, pandas has a dedicated string dtype
# which is inferred by default.
return dtypes.String()
else:
df = column.to_frame()
df = native_column.to_frame()
if hasattr(df, "__dataframe__"):
from narwhals._interchange.dataframe import (
map_interchange_dtype_to_narwhals_dtype,
Expand Down
3 changes: 2 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Iterable
from typing import Iterator
from typing import Literal
from typing import NoReturn
from typing import Sequence
from typing import TypeVar
from typing import overload
Expand Down Expand Up @@ -2787,7 +2788,7 @@ def __repr__(self) -> str: # pragma: no cover
+ "┘"
)

def __getitem__(self, item: str | slice) -> Series | Self:
def __getitem__(self, item: str | slice) -> NoReturn:
msg = "Slicing is not supported on LazyFrame"
raise TypeError(msg)

Expand Down
Loading

0 comments on commit a560aae

Please sign in to comment.