[CLN] More Misc Cleanups in _libs (pandas-dev#22287)

peterpanmj · Aug 20, 2018 · b6e35ff · b6e35ff
1 parent a3c50a6
commit b6e35ff
Show file tree

Hide file tree

Showing 12 changed files with 118 additions and 93 deletions.
diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd
@@ -1,17 +1,20 @@
 from util cimport numeric
-from numpy cimport float64_t, double_t
+
 
 cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil
 
+
 cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
-    cdef numeric t
+    cdef:
+        numeric t
 
     # cython doesn't allow pointer dereference so use array syntax
     t = a[0]
     a[0] = b[0]
     b[0] = t
     return 0
 
+
 cdef enum TiebreakEnumType:
     TIEBREAK_AVERAGE
     TIEBREAK_MIN,

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -45,7 +45,7 @@ tiebreakers = {
 }
 
 
-cdef inline are_diff(object left, object right):
+cdef inline bint are_diff(object left, object right):
     try:
         return fabs(left - right) > FP_ERR
     except TypeError:

diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -68,12 +68,12 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
-                 limit=None):
-    cdef Py_ssize_t i, j, nleft, nright
-    cdef ndarray[int64_t, ndim=1] indexer
-    cdef {{c_type}} cur, next
-    cdef int lim, fill_count = 0
+def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
+    cdef:
+        Py_ssize_t i, j, nleft, nright
+        ndarray[int64_t, ndim=1] indexer
+        {{c_type}} cur, next
+        int lim, fill_count = 0
 
     nleft = len(old)
     nright = len(new)
@@ -135,9 +135,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
 def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
                          ndarray[uint8_t, cast=True] mask,
                          limit=None):
-    cdef Py_ssize_t i, N
-    cdef {{c_type}} val
-    cdef int lim, fill_count = 0
+    cdef:
+        Py_ssize_t i, N
+        {{c_type}} val
+        int lim, fill_count = 0
 
     N = len(values)
 
@@ -171,9 +172,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
 def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
                             ndarray[uint8_t, ndim=2] mask,
                             limit=None):
-    cdef Py_ssize_t i, j, N, K
-    cdef {{c_type}} val
-    cdef int lim, fill_count = 0
+    cdef:
+        Py_ssize_t i, j, N, K
+        {{c_type}} val
+        int lim, fill_count = 0
 
     K, N = (<object> values).shape
 
@@ -233,10 +235,11 @@ D
 @cython.wraparound(False)
 def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
                       limit=None):
-    cdef Py_ssize_t i, j, nleft, nright
-    cdef ndarray[int64_t, ndim=1] indexer
-    cdef {{c_type}} cur, prev
-    cdef int lim, fill_count = 0
+    cdef:
+        Py_ssize_t i, j, nleft, nright
+        ndarray[int64_t, ndim=1] indexer
+        {{c_type}} cur, prev
+        int lim, fill_count = 0
 
     nleft = len(old)
     nright = len(new)
@@ -299,9 +302,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
 def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
                               ndarray[uint8_t, cast=True] mask,
                               limit=None):
-    cdef Py_ssize_t i, N
-    cdef {{c_type}} val
-    cdef int lim, fill_count = 0
+    cdef:
+        Py_ssize_t i, N
+        {{c_type}} val
+        int lim, fill_count = 0
 
     N = len(values)
 
@@ -335,9 +339,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
 def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
                                  ndarray[uint8_t, ndim=2] mask,
                                  limit=None):
-    cdef Py_ssize_t i, j, N, K
-    cdef {{c_type}} val
-    cdef int lim, fill_count = 0
+    cdef:
+        Py_ssize_t i, j, N, K
+        {{c_type}} val
+        int lim, fill_count = 0
 
     K, N = (<object> values).shape
 
@@ -428,10 +433,10 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
-    cdef Py_ssize_t length = index.shape[0]
-    cdef Py_ssize_t i = 0
-
-    cdef ndarray[object] result = np.empty(length, dtype=np.object_)
+    cdef:
+        Py_ssize_t length = index.shape[0]
+        Py_ssize_t i = 0
+        ndarray[object] result = np.empty(length, dtype=np.object_)
 
     from pandas._libs.lib import maybe_convert_objects
 
@@ -535,6 +540,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
 
 cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num
 
+
 cpdef ensure_platform_int(object arr):
     # GH3033, GH1392
     # platform int is the size of the int pointer, e.g. np.intp
@@ -546,6 +552,7 @@ cpdef ensure_platform_int(object arr):
     else:
         return np.array(arr, dtype=np.intp)
 
+
 cpdef ensure_object(object arr):
     if util.is_array(arr):
         if (<ndarray> arr).descr.type_num == NPY_OBJECT:

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -27,9 +27,10 @@ cdef double nan = NaN
 
 
 cdef inline float64_t median_linear(float64_t* a, int n) nogil:
-    cdef int i, j, na_count = 0
-    cdef float64_t result
-    cdef float64_t* tmp
+    cdef:
+        int i, j, na_count = 0
+        float64_t result
+        float64_t* tmp
 
     if n == 0:
         return NaN
@@ -318,7 +319,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 
             # If we move to the next group, reset
             # the fill_idx and counter
-            if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
+            if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]:
                 curr_fill_idx = -1
                 filled_vals = 0
 

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -48,9 +48,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
     k = <bytes>key.encode(encoding)
     kb = <uint8_t *>k
     if len(k) != 16:
-        raise ValueError(
-            'key should be a 16-byte string encoded, got {!r} (len {})'.format(
-                k, len(k)))
+        raise ValueError("key should be a 16-byte string encoded, "
+                         "got {key} (len {klen})".format(key=k, klen=len(k)))
 
     n = len(arr)
 
@@ -70,8 +69,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
             data = <bytes>str(val).encode(encoding)
 
         else:
-            raise TypeError("{} of type {} is not a valid type for hashing, "
-                            "must be string or null".format(val, type(val)))
+            raise TypeError("{val} of type {typ} is not a valid type "
+                            "for hashing, must be string or null"
+                            .format(val=val, typ=type(val)))
 
         l = len(data)
         lens[i] = l
@@ -134,9 +134,9 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
 
 cpdef uint64_t siphash(bytes data, bytes key) except? 0:
     if len(key) != 16:
-        raise ValueError(
-            'key should be a 16-byte bytestring, got {!r} (len {})'.format(
-                key, len(key)))
+        raise ValueError("key should be a 16-byte bytestring, "
+                         "got {key} (len {klen})"
+                         .format(key=key, klen=len(key)))
     return low_level_siphash(data, len(data), key)
 
 

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -115,7 +115,8 @@ cdef class {{name}}Vector:
 
         if needs_resize(self.data):
             if self.external_view_exists:
-                raise ValueError("external reference but Vector.resize() needed")
+                raise ValueError("external reference but "
+                                 "Vector.resize() needed")
             self.resize()
 
         append_data_{{dtype}}(self.data, x)
@@ -194,6 +195,7 @@ cdef class StringVector:
         for i in range(len(x)):
             self.append(x[i])
 
+
 cdef class ObjectVector:
 
     cdef:
@@ -215,7 +217,8 @@ cdef class ObjectVector:
     cdef inline append(self, object o):
         if self.n == self.m:
             if self.external_view_exists:
-                raise ValueError("external reference but Vector.resize() needed")
+                raise ValueError("external reference but "
+                                 "Vector.resize() needed")
             self.m = max(self.m * 2, _INIT_VEC_CAP)
             self.ao.resize(self.m, refcheck=False)
             self.data = <PyObject**> self.ao.data
@@ -405,8 +408,9 @@ cdef class {{name}}HashTable(HashTable):
                     if needs_resize(ud):
                         with gil:
                             if uniques.external_view_exists:
-                                raise ValueError("external reference to uniques held, "
-                                        "but Vector.resize() needed")
+                                raise ValueError("external reference to "
+                                                 "uniques held, but "
+                                                 "Vector.resize() needed")
                             uniques.resize()
                     append_data_{{dtype}}(ud, val)
                     labels[i] = count
@@ -742,8 +746,10 @@ cdef class StringHashTable(HashTable):
 
         return np.asarray(labels)
 
+
 na_sentinel = object
 
+
 cdef class PyObjectHashTable(HashTable):
 
     def __init__(self, size_hint=1):

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -273,7 +273,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
 {{endfor}}
 
 
-
 #----------------------------------------------------------------------
 # Mode Computations
 #----------------------------------------------------------------------

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -24,7 +24,7 @@ is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'
 cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
 
     if (util.is_array(obj) or
-            isinstance(obj, list) and len(obj) == cnt or
+            (isinstance(obj, list) and len(obj) == cnt) or
             getattr(obj, 'shape', None) == (cnt,)):
         raise ValueError('function does not reduce')
 

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -61,12 +61,14 @@ cdef inline object create_datetime_from_ts(
     return datetime(dts.year, dts.month, dts.day, dts.hour,
                     dts.min, dts.sec, dts.us, tz)
 
+
 cdef inline object create_date_from_ts(
         int64_t value, npy_datetimestruct dts,
         object tz, object freq):
     """ convenience routine to construct a datetime.date from its parts """
     return date(dts.year, dts.month, dts.day)
 
+
 cdef inline object create_time_from_ts(
         int64_t value, npy_datetimestruct dts,
         object tz, object freq):
@@ -350,8 +352,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
 
             if ((fvalues < _NS_LOWER_BOUND).any()
                     or (fvalues > _NS_UPPER_BOUND).any()):
-                raise OutOfBoundsDatetime(
-                    "cannot convert input with unit '{0}'".format(unit))
+                raise OutOfBoundsDatetime("cannot convert input with unit "
+                                          "'{unit}'".format(unit=unit))
             result = (iresult * m).astype('M8[ns]')
             iresult = result.view('i8')
             iresult[mask] = iNaT
@@ -377,8 +379,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
                     except OverflowError:
                         if is_raise:
                             raise OutOfBoundsDatetime(
-                                "cannot convert input {0} with the unit "
-                                "'{1}'".format(val, unit))
+                                "cannot convert input {val} with the unit "
+                                "'{unit}'".format(val=val, unit=unit))
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
@@ -393,16 +395,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
                     except ValueError:
                         if is_raise:
                             raise ValueError(
-                                "non convertible value {0} with the unit "
-                                "'{1}'".format(val, unit))
+                                "non convertible value {val} with the unit "
+                                "'{unit}'".format(val=val, unit=unit))
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
                     except:
                         if is_raise:
                             raise OutOfBoundsDatetime(
-                                "cannot convert input {0} with the unit "
-                                "'{1}'".format(val, unit))
+                                "cannot convert input {val} with the unit "
+                                "'{unit}'".format(val=val, unit=unit))
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
@@ -695,8 +697,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                 if is_coerce:
                     iresult[i] = NPY_NAT
                 else:
-                    raise TypeError("{0} is not convertible to datetime"
-                                    .format(type(val)))
+                    raise TypeError("{typ} is not convertible to datetime"
+                                    .format(typ=type(val)))
 
         if seen_datetime and seen_integer:
             # we have mixed datetimes & integers

diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
@@ -10,7 +10,7 @@ cpdef bint tz_compare(object start, object end)
 cpdef object get_timezone(object tz)
 cpdef object maybe_get_tz(object tz)
 
-cpdef get_utcoffset(tzinfo, obj)
+cdef get_utcoffset(tzinfo, obj)
 cdef bint is_fixed_offset(object tz)
 
 cdef object get_dst_info(object tz)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
@@ -149,7 +149,7 @@ cdef inline object tz_cache_key(object tz):
 # UTC Offsets
 
 
-cpdef get_utcoffset(tzinfo, obj):
+cdef get_utcoffset(tzinfo, obj):
     try:
         return tzinfo._utcoffset
     except AttributeError:
@@ -186,7 +186,7 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz):
     return new_trans
 
 
-cpdef int64_t[:] unbox_utcoffsets(object transinfo):
+cdef int64_t[:] unbox_utcoffsets(object transinfo):
     cdef:
         Py_ssize_t i, sz
         int64_t[:] arr