From afec0e9c1b2856cf28071327141a0ac0e4ba8aee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 19 Mar 2022 15:32:29 -0700 Subject: [PATCH] clean import/comments (#46434) --- pandas/_libs/tslibs/conversion.pyx | 6 +- pandas/_libs/tslibs/tzconversion.pxd | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 112 +++++++++++++------------ pandas/_libs/tslibs/vectorized.pyx | 119 ++++++++++++++------------- 4 files changed, 127 insertions(+), 112 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 792b6d1c35b2f..00f83be0b51c4 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -71,7 +71,7 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.tzconversion cimport ( bisect_right_i8, - tz_convert_utc_to_tzlocal, + localize_tzinfo_api, tz_localize_to_utc_single, ) @@ -556,7 +556,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, if is_utc(tz): pass elif is_tzlocal(tz): - tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold) + localize_tzinfo_api(obj.value, tz, &obj.fold) else: trans, deltas, typ = get_dst_info(tz) @@ -725,7 +725,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold) + local_val = obj.value + localize_tzinfo_api(obj.value, tz, &obj.fold) dt64_to_dtstruct(local_val, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 0837a5c436197..136e62985995e 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -2,7 +2,7 @@ from cpython.datetime cimport tzinfo from numpy cimport int64_t -cdef int64_t tz_convert_utc_to_tzlocal( +cdef int64_t localize_tzinfo_api( int64_t utc_val, tzinfo tz, bint* fold=* ) except? -1 cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 7efe9412e43b9..705c4cef5c05d 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -7,6 +7,7 @@ from cython import Py_ssize_t from cpython.datetime cimport ( PyDelta_Check, datetime, + datetime_new, import_datetime, timedelta, tzinfo, @@ -43,6 +44,7 @@ from pandas._libs.tslibs.timezones cimport ( is_fixed_offset, is_tzlocal, is_utc, + utc_pytz, ) @@ -61,7 +63,7 @@ cdef int64_t tz_localize_to_utc_single( return val elif is_tzlocal(tz): - return _tz_convert_tzlocal_utc(val, tz, to_utc=True) + return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True) elif is_fixed_offset(tz): # TODO: in this case we should be able to use get_utcoffset, @@ -142,7 +144,7 @@ timedelta-like} if v == NPY_NAT: result[i] = NPY_NAT else: - result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) + result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True) return result # silence false-positive compiler warning @@ -402,7 +404,7 @@ cdef ndarray[int64_t] _get_dst_hours( # ---------------------------------------------------------------------- # Timezone Conversion -cdef int64_t tz_convert_utc_to_tzlocal( +cdef int64_t localize_tzinfo_api( int64_t utc_val, tzinfo tz, bint* fold=NULL ) except? -1: """ @@ -416,12 +418,13 @@ cdef int64_t tz_convert_utc_to_tzlocal( Returns ------- - local_val : int64_t + delta : int64_t + Value to add when converting from utc. """ - return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold) + return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold) -cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): +cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz): """ Convert the val (in i8) from UTC to tz @@ -429,7 +432,7 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): Parameters ---------- - val : int64 + utc_val : int64 tz : tzinfo Returns @@ -443,22 +446,22 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): int64_t* tdata intp_t pos - if val == NPY_NAT: - return val + if utc_val == NPY_NAT: + return utc_val if is_utc(tz): - return val + return utc_val elif is_tzlocal(tz): - return _tz_convert_tzlocal_utc(val, tz, to_utc=False) + return utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False) elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) delta = deltas[0] - return val + delta + return utc_val + delta else: trans, deltas, _ = get_dst_info(tz) tdata = cnp.PyArray_DATA(trans) - pos = bisect_right_i8(tdata, val, trans.shape[0]) - 1 - return val + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, trans.shape[0]) - 1 + return utc_val + deltas[pos] def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): @@ -486,13 +489,13 @@ def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): @cython.boundscheck(False) @cython.wraparound(False) -cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): +cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz): """ Convert the given values (in i8) either to UTC or from UTC. Parameters ---------- - vals : int64 ndarray + stamps : int64 ndarray tz : tzinfo Returns @@ -500,18 +503,20 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): converted : ndarray[int64_t] """ cdef: - int64_t[::1] converted, deltas - Py_ssize_t i, ntrans = -1, n = vals.shape[0] - int64_t val, delta = 0 # avoid not-initialized-warning - intp_t pos + Py_ssize_t i, ntrans = -1, n = stamps.shape[0] ndarray[int64_t] trans + int64_t[::1] deltas int64_t* tdata = NULL + intp_t pos + int64_t utc_val, local_val, delta = NPY_NAT + bint use_utc = False, use_tzlocal = False, use_fixed = False str typ - bint use_tzlocal = False, use_fixed = False, use_utc = True + + int64_t[::1] result if is_utc(tz): # Much faster than going through the "standard" pattern below - return vals.copy() + return stamps.copy() if is_utc(tz) or tz is None: use_utc = True @@ -520,59 +525,62 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): else: trans, deltas, typ = get_dst_info(tz) ntrans = trans.shape[0] - if typ not in ["pytz", "dateutil"]: - # FixedOffset, we know len(deltas) == 1 - delta = deltas[0] + # static/fixed; in this case we know that len(delta) == 1 use_fixed = True + delta = deltas[0] else: tdata = cnp.PyArray_DATA(trans) - converted = np.empty(n, dtype=np.int64) + result = np.empty(n, dtype=np.int64) for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = NPY_NAT + utc_val = stamps[i] + if utc_val == NPY_NAT: + result[i] = NPY_NAT continue # The pattern used in vectorized.pyx checks for use_utc here, # but we handle that case above. if use_tzlocal: - converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) + local_val = utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False) elif use_fixed: - converted[i] = val + delta + local_val = utc_val + delta else: - pos = bisect_right_i8(tdata, val, ntrans) - 1 - converted[i] = val + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 + local_val = utc_val + deltas[pos] + + result[i] = local_val - return converted + return result # OSError may be thrown by tzlocal on windows at or close to 1970-01-01 # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 -cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, - bint* fold=NULL) except? -1: +cdef int64_t _tz_localize_using_tzinfo_api( + int64_t val, tzinfo tz, bint to_utc=True, bint* fold=NULL +) except? -1: """ - Convert the i8 representation of a datetime from a tzlocal timezone to - UTC, or vice-versa. + Convert the i8 representation of a datetime from a general-cast timezone to + UTC, or vice-versa using the datetime/tzinfo API. - Private, not intended for use outside of tslibs.conversion + Private, not intended for use outside of tslibs.tzconversion. Parameters ---------- val : int64_t tz : tzinfo to_utc : bint - True if converting tzlocal _to_ UTC, False if going the other direction + True if converting _to_ UTC, False if going the other direction. fold : bint*, default NULL pointer to fold: whether datetime ends up in a fold or not - after adjustment + after adjustment. Only passed with to_utc=False. Returns ------- - result : int64_t + delta : int64_t + Value to add when converting from utc, subtract when converting to utc. Notes ----- @@ -586,23 +594,21 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, dt64_to_dtstruct(val, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us) - - # tz.utcoffset only makes sense if datetime - # is _wall time_, so if val is a UTC timestamp convert to wall time + # datetime_new is cython-optimized constructor if not to_utc: - dt = dt.replace(tzinfo=tzutc()) + # tz.utcoffset only makes sense if datetime + # is _wall time_, so if val is a UTC timestamp convert to wall time + dt = datetime_new(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, utc_pytz) dt = dt.astimezone(tz) if fold is not NULL: # NB: fold is only passed with to_utc=False fold[0] = dt.fold + else: + dt = datetime_new(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, None) td = tz.utcoffset(dt) delta = int(td.total_seconds() * 1_000_000_000) - - if to_utc: - return val - delta - else: - return val + delta + return delta diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 3f47a19563b61..9849bbce8c564 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -21,7 +21,7 @@ cnp.import_array() from .conversion cimport normalize_i8_stamp from .dtypes import Resolution - +from .ccalendar cimport DAY_NANOS from .nattype cimport ( NPY_NAT, c_NaT as NaT, @@ -40,7 +40,7 @@ from .timezones cimport ( ) from .tzconversion cimport ( bisect_right_i8, - tz_convert_utc_to_tzlocal, + localize_tzinfo_api, ) # ------------------------------------------------------------------------- @@ -83,17 +83,18 @@ def ints_to_pydatetime( ndarray[object] of type specified by box """ cdef: - Py_ssize_t i, ntrans =- 1, n = len(stamps) + Py_ssize_t i, ntrans = -1, n = stamps.shape[0] ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL intp_t pos + int64_t utc_val, local_val, delta = NPY_NAT + bint use_utc = False, use_tzlocal = False, use_fixed = False + str typ + npy_datetimestruct dts tzinfo new_tz - str typ - int64_t value, local_val, delta = NPY_NAT # dummy for delta ndarray[object] result = np.empty(n, dtype=object) - bint use_utc = False, use_tzlocal = False, use_fixed = False bint use_pytz = False bint use_date = False, use_time = False, use_ts = False, use_pydt = False @@ -127,22 +128,22 @@ def ints_to_pydatetime( use_pytz = typ == "pytz" for i in range(n): + utc_val = stamps[i] new_tz = tz - value = stamps[i] - if value == NPY_NAT: + if utc_val == NPY_NAT: result[i] = NaT continue if use_utc: - local_val = value + local_val = utc_val elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(value, tz) + local_val = utc_val + localize_tzinfo_api(utc_val, tz) elif use_fixed: - local_val = value + delta + local_val = utc_val + delta else: - pos = bisect_right_i8(tdata, value, ntrans) - 1 - local_val = value + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 + local_val = utc_val + deltas[pos] if use_pytz: # find right representation of dst etc in pytz timezone @@ -151,7 +152,7 @@ def ints_to_pydatetime( dt64_to_dtstruct(local_val, &dts) if use_ts: - result[i] = create_timestamp_from_ts(value, dts, new_tz, freq, fold) + result[i] = create_timestamp_from_ts(utc_val, dts, new_tz, freq, fold) elif use_pydt: result[i] = datetime( dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, @@ -194,15 +195,17 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): @cython.boundscheck(False) def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: cdef: - Py_ssize_t i, ntrans=-1, n = len(stamps) - npy_datetimestruct dts - int reso = RESO_DAY, curr_reso + Py_ssize_t i, ntrans = -1, n = stamps.shape[0] ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL intp_t pos - int64_t local_val, delta = NPY_NAT + int64_t utc_val, local_val, delta = NPY_NAT bint use_utc = False, use_tzlocal = False, use_fixed = False + str typ + + npy_datetimestruct dts + int reso = RESO_DAY, curr_reso if is_utc(tz) or tz is None: use_utc = True @@ -219,18 +222,19 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: tdata = cnp.PyArray_DATA(trans) for i in range(n): - if stamps[i] == NPY_NAT: + utc_val = stamps[i] + if utc_val == NPY_NAT: continue if use_utc: - local_val = stamps[i] + local_val = utc_val elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + local_val = utc_val + localize_tzinfo_api(utc_val, tz) elif use_fixed: - local_val = stamps[i] + delta + local_val = utc_val + delta else: - pos = bisect_right_i8(tdata, stamps[i], ntrans) - 1 - local_val = stamps[i] + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 + local_val = utc_val + deltas[pos] dt64_to_dtstruct(local_val, &dts) curr_reso = _reso_stamp(&dts) @@ -260,15 +264,16 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t result : int64 ndarray of converted of normalized nanosecond timestamps """ cdef: - Py_ssize_t i, ntrans =- 1, n = len(stamps) - int64_t[::1] result = np.empty(n, dtype=np.int64) + Py_ssize_t i, ntrans = -1, n = stamps.shape[0] ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL - str typ - Py_ssize_t pos - int64_t local_val, delta = NPY_NAT + intp_t pos + int64_t utc_val, local_val, delta = NPY_NAT bint use_utc = False, use_tzlocal = False, use_fixed = False + str typ + + int64_t[::1] result = np.empty(n, dtype=np.int64) if is_utc(tz) or tz is None: use_utc = True @@ -285,19 +290,20 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t tdata = cnp.PyArray_DATA(trans) for i in range(n): - if stamps[i] == NPY_NAT: + utc_val = stamps[i] + if utc_val == NPY_NAT: result[i] = NPY_NAT continue if use_utc: - local_val = stamps[i] + local_val = utc_val elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + local_val = utc_val + localize_tzinfo_api(utc_val, tz) elif use_fixed: - local_val = stamps[i] + delta + local_val = utc_val + delta else: - pos = bisect_right_i8(tdata, stamps[i], ntrans) - 1 - local_val = stamps[i] + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 + local_val = utc_val + deltas[pos] result[i] = normalize_i8_stamp(local_val) @@ -322,15 +328,14 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: is_normalized : bool True if all stamps are normalized """ cdef: - Py_ssize_t i, ntrans =- 1, n = len(stamps) + Py_ssize_t i, ntrans = -1, n = stamps.shape[0] ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL intp_t pos - int64_t local_val, delta = NPY_NAT - str typ - int64_t day_nanos = 24 * 3600 * 1_000_000_000 + int64_t utc_val, local_val, delta = NPY_NAT bint use_utc = False, use_tzlocal = False, use_fixed = False + str typ if is_utc(tz) or tz is None: use_utc = True @@ -347,17 +352,18 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: tdata = cnp.PyArray_DATA(trans) for i in range(n): + utc_val = stamps[i] if use_utc: - local_val = stamps[i] + local_val = utc_val elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + local_val = utc_val + localize_tzinfo_api(utc_val, tz) elif use_fixed: - local_val = stamps[i] + delta + local_val = utc_val + delta else: - pos = bisect_right_i8(tdata, stamps[i], ntrans) - 1 - local_val = stamps[i] + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 + local_val = utc_val + deltas[pos] - if local_val % day_nanos != 0: + if local_val % DAY_NANOS != 0: return False return True @@ -370,15 +376,17 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: @cython.boundscheck(False) def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): cdef: - Py_ssize_t i, ntrans =- 1, n = len(stamps) - int64_t[::1] result = np.empty(n, dtype=np.int64) + Py_ssize_t i, ntrans = -1, n = stamps.shape[0] ndarray[int64_t] trans int64_t[::1] deltas int64_t* tdata = NULL intp_t pos - npy_datetimestruct dts - int64_t local_val, delta = NPY_NAT + int64_t utc_val, local_val, delta = NPY_NAT bint use_utc = False, use_tzlocal = False, use_fixed = False + str typ + + npy_datetimestruct dts + int64_t[::1] result = np.empty(n, dtype=np.int64) if is_utc(tz) or tz is None: use_utc = True @@ -395,19 +403,20 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): tdata = cnp.PyArray_DATA(trans) for i in range(n): - if stamps[i] == NPY_NAT: + utc_val = stamps[i] + if utc_val == NPY_NAT: result[i] = NPY_NAT continue if use_utc: - local_val = stamps[i] + local_val = utc_val elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + local_val = utc_val + localize_tzinfo_api(utc_val, tz) elif use_fixed: - local_val = stamps[i] + delta + local_val = utc_val + delta else: - pos = bisect_right_i8(tdata, stamps[i], ntrans) - 1 - local_val = stamps[i] + deltas[pos] + pos = bisect_right_i8(tdata, utc_val, ntrans) - 1 + local_val = utc_val + deltas[pos] dt64_to_dtstruct(local_val, &dts) result[i] = get_period_ordinal(&dts, freq)