Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: _pandas_series to _compliant_series in PandasSeries #1517

Merged
merged 1 commit into from
Dec 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 79 additions & 78 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,29 +867,29 @@ def cat(self) -> PandasLikeSeriesCatNamespace:

class PandasLikeSeriesCatNamespace:
def __init__(self, series: PandasLikeSeries) -> None:
self._pandas_series = series
self._compliant_series = series

def get_categories(self) -> PandasLikeSeries:
s = self._pandas_series._native_series
return self._pandas_series._from_native_series(
s = self._compliant_series._native_series
return self._compliant_series._from_native_series(
s.__class__(s.cat.categories, name=s.name)
)


class PandasLikeSeriesStringNamespace:
def __init__(self, series: PandasLikeSeries) -> None:
self._pandas_series = series
self._compliant_series = series

def len_chars(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.len()
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.len()
)

def replace(
self, pattern: str, value: str, *, literal: bool = False, n: int = 1
) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.replace(
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.replace(
pat=pattern, repl=value, n=n, regex=not literal
),
)
Expand All @@ -900,58 +900,58 @@ def replace_all(
return self.replace(pattern, value, literal=literal, n=-1)

def strip_chars(self, characters: str | None) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.strip(characters),
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.strip(characters),
)

def starts_with(self, prefix: str) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.startswith(prefix),
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.startswith(prefix),
)

def ends_with(self, suffix: str) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.endswith(suffix),
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.endswith(suffix),
)

def contains(self, pattern: str, *, literal: bool = False) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.contains(
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.contains(
pat=pattern, regex=not literal
)
)

def slice(self, offset: int, length: int | None = None) -> PandasLikeSeries:
stop = offset + length if length else None
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.slice(start=offset, stop=stop),
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.slice(start=offset, stop=stop),
)

def to_datetime(self: Self, format: str | None) -> PandasLikeSeries: # noqa: A002
return self._pandas_series._from_native_series(
to_datetime(self._pandas_series._implementation)(
self._pandas_series._native_series, format=format
return self._compliant_series._from_native_series(
to_datetime(self._compliant_series._implementation)(
self._compliant_series._native_series, format=format
)
)

def to_uppercase(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.upper(),
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.upper(),
)

def to_lowercase(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.lower(),
return self._compliant_series._from_native_series(
self._compliant_series._native_series.str.lower(),
)


class PandasLikeSeriesDateTimeNamespace:
def __init__(self, series: PandasLikeSeries) -> None:
self._pandas_series = series
self._compliant_series = series

def date(self) -> PandasLikeSeries:
result = self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.date,
result = self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.date,
)
if str(result.dtype).lower() == "object":
msg = (
Expand All @@ -965,87 +965,88 @@ def date(self) -> PandasLikeSeries:
return result

def year(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.year,
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.year,
)

def month(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.month,
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.month,
)

def day(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.day,
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.day,
)

def hour(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.hour,
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.hour,
)

def minute(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.minute,
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.minute,
)

def second(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.second,
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.second,
)

def millisecond(self) -> PandasLikeSeries:
return self.microsecond() // 1000

def microsecond(self) -> PandasLikeSeries:
if self._pandas_series._backend_version < (3, 0, 0) and "pyarrow" in str(
self._pandas_series._native_series.dtype
if self._compliant_series._backend_version < (3, 0, 0) and "pyarrow" in str(
self._compliant_series._native_series.dtype
):
# crazy workaround for https://github.com/pandas-dev/pandas/issues/59154
import pyarrow.compute as pc # ignore-banned-import()

native_series = self._pandas_series._native_series
native_series = self._compliant_series._native_series
arr = native_series.array.__arrow_array__()
result_arr = pc.add(
pc.multiply(pc.millisecond(arr), 1000), pc.microsecond(arr)
)
result = native_series.__class__(
native_series.array.__class__(result_arr), name=native_series.name
)
return self._pandas_series._from_native_series(result)
return self._compliant_series._from_native_series(result)

return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.microsecond
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.microsecond
)

def nanosecond(self) -> PandasLikeSeries:
return ( # type: ignore[no-any-return]
self.microsecond() * 1_000 + self._pandas_series._native_series.dt.nanosecond
self.microsecond() * 1_000
+ self._compliant_series._native_series.dt.nanosecond
)

def ordinal_day(self) -> PandasLikeSeries:
ser = self._pandas_series._native_series
ser = self._compliant_series._native_series
year_start = ser.dt.year
result = (
ser.to_numpy().astype("datetime64[D]")
- (year_start.to_numpy() - 1970).astype("datetime64[Y]")
).astype("int32") + 1
dtype = "Int64[pyarrow]" if "pyarrow" in str(ser.dtype) else "int32"
return self._pandas_series._from_native_series(
self._pandas_series._native_series.__class__(
return self._compliant_series._from_native_series(
self._compliant_series._native_series.__class__(
result, dtype=dtype, name=year_start.name
)
)

def _get_total_seconds(self) -> Any:
if hasattr(self._pandas_series._native_series.dt, "total_seconds"):
return self._pandas_series._native_series.dt.total_seconds()
if hasattr(self._compliant_series._native_series.dt, "total_seconds"):
return self._compliant_series._native_series.dt.total_seconds()
else: # pragma: no cover
return (
self._pandas_series._native_series.dt.days * 86400
+ self._pandas_series._native_series.dt.seconds
+ (self._pandas_series._native_series.dt.microseconds / 1e6)
+ (self._pandas_series._native_series.dt.nanoseconds / 1e9)
self._compliant_series._native_series.dt.days * 86400
+ self._compliant_series._native_series.dt.seconds
+ (self._compliant_series._native_series.dt.microseconds / 1e6)
+ (self._compliant_series._native_series.dt.nanoseconds / 1e9)
)

def total_minutes(self) -> PandasLikeSeries:
Expand All @@ -1056,7 +1057,7 @@ def total_minutes(self) -> PandasLikeSeries:
s_abs = s.abs() // 60
if ~s.isna().any():
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
return self._pandas_series._from_native_series(s_abs * s_sign)
return self._compliant_series._from_native_series(s_abs * s_sign)

def total_seconds(self) -> PandasLikeSeries:
s = self._get_total_seconds()
Expand All @@ -1066,7 +1067,7 @@ def total_seconds(self) -> PandasLikeSeries:
s_abs = s.abs() // 1
if ~s.isna().any():
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
return self._pandas_series._from_native_series(s_abs * s_sign)
return self._compliant_series._from_native_series(s_abs * s_sign)

def total_milliseconds(self) -> PandasLikeSeries:
s = self._get_total_seconds() * 1e3
Expand All @@ -1076,7 +1077,7 @@ def total_milliseconds(self) -> PandasLikeSeries:
s_abs = s.abs() // 1
if ~s.isna().any():
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
return self._pandas_series._from_native_series(s_abs * s_sign)
return self._compliant_series._from_native_series(s_abs * s_sign)

def total_microseconds(self) -> PandasLikeSeries:
s = self._get_total_seconds() * 1e6
Expand All @@ -1086,7 +1087,7 @@ def total_microseconds(self) -> PandasLikeSeries:
s_abs = s.abs() // 1
if ~s.isna().any():
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
return self._pandas_series._from_native_series(s_abs * s_sign)
return self._compliant_series._from_native_series(s_abs * s_sign)

def total_nanoseconds(self) -> PandasLikeSeries:
s = self._get_total_seconds() * 1e9
Expand All @@ -1096,53 +1097,53 @@ def total_nanoseconds(self) -> PandasLikeSeries:
s_abs = s.abs() // 1
if ~s.isna().any():
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
return self._pandas_series._from_native_series(s_abs * s_sign)
return self._compliant_series._from_native_series(s_abs * s_sign)

def to_string(self, format: str) -> PandasLikeSeries: # noqa: A002
# Polars' parser treats `'%.f'` as pandas does `'.%f'`
# PyArrow interprets `'%S'` as "seconds, plus fractional seconds"
# and doesn't support `%f`
if "pyarrow" not in str(self._pandas_series._native_series.dtype):
if "pyarrow" not in str(self._compliant_series._native_series.dtype):
format = format.replace("%S%.f", "%S.%f")
else:
format = format.replace("%S.%f", "%S").replace("%S%.f", "%S")
return self._pandas_series._from_native_series(
self._pandas_series._native_series.dt.strftime(format)
return self._compliant_series._from_native_series(
self._compliant_series._native_series.dt.strftime(format)
)

def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries:
if time_zone is not None:
result = self._pandas_series._native_series.dt.tz_localize(
result = self._compliant_series._native_series.dt.tz_localize(
None
).dt.tz_localize(time_zone)
else:
result = self._pandas_series._native_series.dt.tz_localize(None)
return self._pandas_series._from_native_series(result)
result = self._compliant_series._native_series.dt.tz_localize(None)
return self._compliant_series._from_native_series(result)

def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
if self._pandas_series.dtype.time_zone is None: # type: ignore[attr-defined]
result = self._pandas_series._native_series.dt.tz_localize(
if self._compliant_series.dtype.time_zone is None: # type: ignore[attr-defined]
result = self._compliant_series._native_series.dt.tz_localize(
"UTC"
).dt.tz_convert(time_zone)
else:
result = self._pandas_series._native_series.dt.tz_convert(time_zone)
return self._pandas_series._from_native_series(result)
result = self._compliant_series._native_series.dt.tz_convert(time_zone)
return self._compliant_series._from_native_series(result)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeSeries:
s = self._pandas_series._native_series
dtype = self._pandas_series.dtype
is_pyarrow_dtype = "pyarrow" in str(self._pandas_series._native_series.dtype)
s = self._compliant_series._native_series
dtype = self._compliant_series.dtype
is_pyarrow_dtype = "pyarrow" in str(self._compliant_series._native_series.dtype)
mask_na = s.isna()
dtypes = import_dtypes_module(self._pandas_series._version)
dtypes = import_dtypes_module(self._compliant_series._version)
if dtype == dtypes.Date:
# Date is only supported in pandas dtypes if pyarrow-backed
s_cast = s.astype("Int32[pyarrow]")
result = calculate_timestamp_date(s_cast, time_unit)
elif dtype == dtypes.Datetime:
original_time_unit = dtype.time_unit # type: ignore[attr-defined]
if (
self._pandas_series._implementation is Implementation.PANDAS
and self._pandas_series._backend_version < (2,)
self._compliant_series._implementation is Implementation.PANDAS
and self._compliant_series._backend_version < (2,)
): # pragma: no cover
s_cast = s.view("Int64[pyarrow]") if is_pyarrow_dtype else s.view("int64")
else:
Expand All @@ -1154,4 +1155,4 @@ def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeSe
msg = "Input should be either of Date or Datetime type"
raise TypeError(msg)
result[mask_na] = None
return self._pandas_series._from_native_series(result)
return self._compliant_series._from_native_series(result)
Loading