CLN: Assorted _libs cleanups (pandas-dev#22235)

peterpanmj · Aug 10, 2018 · 7390963 · 7390963
1 parent 475e391
commit 7390963
Show file tree

Hide file tree

Showing 31 changed files with 201 additions and 268 deletions.
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
         Py_ssize_t i, j, k
 
     k = len(values)
-    for j from 0 <= j < k:
+    for j in range(k):
         i = indexer[j]
         out[i] = values[j, loc]
 

diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -46,7 +46,7 @@ def get_dispatch(dtypes):
     fv = fill_value
 
     %(nogil_str)s
-    %(tab)sfor i from 0 <= i < n:
+    %(tab)sfor i in range(n):
     %(tab)s    idx = indexer[i]
     %(tab)s    if idx == -1:
     %(tab)s        out[i] = fv
@@ -74,24 +74,24 @@ def get_dispatch(dtypes):
             values.strides[1] == sizeof(%(c_type_out)s) and
             sizeof(%(c_type_out)s) * n >= 256):
 
-            for i from 0 <= i < n:
+            for i in range(n):
                 idx = indexer[i]
                 if idx == -1:
-                    for j from 0 <= j < k:
+                    for j in range(k):
                         out[i, j] = fv
                 else:
                     v = &values[idx, 0]
                     o = &out[i, 0]
                     memmove(o, v, <size_t>(sizeof(%(c_type_out)s) * k))
             return
 
-    for i from 0 <= i < n:
+    for i in range(n):
         idx = indexer[i]
         if idx == -1:
-            for j from 0 <= j < k:
+            for j in range(k):
                 out[i, j] = fv
         else:
-            for j from 0 <= j < k:
+            for j in range(k):
                 out[i, j] = %(preval)svalues[idx, j]%(postval)s
 """
 
@@ -108,8 +108,8 @@ def get_dispatch(dtypes):
 
     fv = fill_value
 
-    for i from 0 <= i < n:
-        for j from 0 <= j < k:
+    for i in range(n):
+        for j in range(k):
             idx = indexer[j]
             if idx == -1:
                 out[i, j] = fv
@@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
     k = len(idx1)
 
     fv = fill_value
-    for i from 0 <= i < n:
+    for i in range(n):
         idx = idx0[i]
         if idx == -1:
-            for j from 0 <= j < k:
+            for j in range(k):
                 out[i, j] = fv
         else:
-            for j from 0 <= j < k:
+            for j in range(k):
                 if idx1[j] == -1:
                     out[i, j] = fv
                 else:

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
         {{endif}}
     elif keep == 'first':
         {{if dtype == 'object'}}
-        for i from 0 <= i < n:
+        for i in range(n):
             kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
             out[i] = ret == 0
         {{else}}
         with nogil:
-            for i from 0 <= i < n:
+            for i in range(n):
                 kh_put_{{ttype}}(table, values[i], &ret)
                 out[i] = ret == 0
         {{endif}}
     else:
         {{if dtype == 'object'}}
-        for i from 0 <= i < n:
+        for i in range(n):
             value = values[i]
             k = kh_get_{{ttype}}(table, <PyObject*> value)
             if k != table.n_buckets:
@@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
                 out[i] = 0
         {{else}}
         with nogil:
-            for i from 0 <= i < n:
+            for i in range(n):
                 value = values[i]
                 k = kh_get_{{ttype}}(table, value)
                 if k != table.n_buckets:

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -78,29 +78,30 @@ cdef bint PY2 = sys.version_info[0] == 2
 cdef double nan = <double>np.NaN
 
 
-def values_from_object(object o):
+def values_from_object(object obj):
     """ return my values or the object if we are say an ndarray """
-    cdef f
+    cdef func  # TODO: Does declaring this without a type accomplish anything?
 
-    f = getattr(o, 'get_values', None)
-    if f is not None:
-        o = f()
+    func = getattr(obj, 'get_values', None)
+    if func is not None:
+        obj = func()
 
-    return o
+    return obj
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def memory_usage_of_objects(ndarray[object, ndim=1] arr):
+def memory_usage_of_objects(object[:] arr):
     """ return the memory usage of an object array in bytes,
     does not include the actual bytes of the pointers """
-    cdef Py_ssize_t i, n
-    cdef int64_t s = 0
+    cdef:
+        Py_ssize_t i, n
+        int64_t size = 0
 
     n = len(arr)
     for i in range(n):
-        s += arr[i].__sizeof__()
-    return s
+        size += arr[i].__sizeof__()
+    return size
 
 
 # ----------------------------------------------------------------------

diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-# cython: profile=False
 import operator
 
 from cpython cimport (PyFloat_Check, PyBool_Check,
@@ -21,7 +20,7 @@ from missing cimport checknull
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_compare(ndarray[object] values, object val, object op):
+def scalar_compare(object[:] values, object val, object op):
     """
     Compare each element of `values` array with the scalar `val`, with
     the comparison operation described by `op`.
@@ -73,7 +72,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
             else:
                 try:
                     result[i] = PyObject_RichCompareBool(x, val, flag)
-                except (TypeError):
+                except TypeError:
                     result[i] = True
     elif flag == Py_EQ:
         for i in range(n):
@@ -85,7 +84,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
             else:
                 try:
                     result[i] = PyObject_RichCompareBool(x, val, flag)
-                except (TypeError):
+                except TypeError:
                     result[i] = False
 
     else:
@@ -103,7 +102,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_compare(ndarray[object] left, ndarray[object] right, object op):
+def vec_compare(object[:] left, object[:] right, object op):
     """
     Compare the elements of `left` with the elements of `right` pointwise,
     with the comparison operation described by `op`.
@@ -126,8 +125,8 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
         int flag
 
     if n != len(right):
-        raise ValueError('Arrays were different lengths: %d vs %d'
-                         % (n, len(right)))
+        raise ValueError('Arrays were different lengths: {n} vs {nright}'
+                         .format(n=n, nright=len(right)))
 
     if op is operator.lt:
         flag = Py_LT
@@ -170,7 +169,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_binop(ndarray[object] values, object val, object op):
+def scalar_binop(object[:] values, object val, object op):
     """
     Apply the given binary operator `op` between each element of the array
     `values` and the scalar `val`.
@@ -187,13 +186,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
     """
     cdef:
         Py_ssize_t i, n = len(values)
-        ndarray[object] result
+        object[:] result
         object x
 
     result = np.empty(n, dtype=object)
     if val is None or is_nan(val):
-        result.fill(val)
-        return result
+        result[:] = val
+        return result.base  # `.base` to access underlying np.ndarray
 
     for i in range(n):
         x = values[i]
@@ -202,12 +201,12 @@ def scalar_binop(ndarray[object] values, object val, object op):
         else:
             result[i] = op(x, val)
 
-    return maybe_convert_bool(result)
+    return maybe_convert_bool(result.base)
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_binop(ndarray[object] left, ndarray[object] right, object op):
+def vec_binop(object[:] left, object[:] right, object op):
     """
     Apply the given binary operator `op` pointwise to the elements of
     arrays `left` and `right`.
@@ -224,11 +223,11 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
     """
     cdef:
         Py_ssize_t i, n = len(left)
-        ndarray[object] result
+        object[:] result
 
     if n != len(right):
-        raise ValueError('Arrays were different lengths: %d vs %d'
-                         % (n, len(right)))
+        raise ValueError('Arrays were different lengths: {n} vs {nright}'
+                         .format(n=n, nright=len(right)))
 
     result = np.empty(n, dtype=object)
 
@@ -245,7 +244,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
             else:
                 raise
 
-    return maybe_convert_bool(result)
+    return maybe_convert_bool(result.base)  # `.base` to access np.ndarray
 
 
 def maybe_convert_bool(ndarray[object] arr,
@@ -270,7 +269,7 @@ def maybe_convert_bool(ndarray[object] arr,
     if false_values is not None:
         false_vals = false_vals | set(false_values)
 
-    for i from 0 <= i < n:
+    for i in range(n):
         val = arr[i]
 
         if PyBool_Check(val):

diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
@@ -148,7 +148,7 @@ cdef class IntIndex(SparseIndex):
         new_indices = np.empty(min(
             len(xindices), len(yindices)), dtype=np.int32)
 
-        for xi from 0 <= xi < self.npoints:
+        for xi in range(self.npoints):
             xind = xindices[xi]
 
             while yi < y.npoints and yindices[yi] < xind:
@@ -292,7 +292,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
 
     # TODO: two-pass algorithm faster?
     prev = block = indices[0]
-    for i from 1 <= i < npoints:
+    for i in range(1, npoints):
         cur = indices[i]
         if cur - prev > 1:
             # new block
@@ -383,21 +383,22 @@ cdef class BlockIndex(SparseIndex):
         if len(blocs) != len(blengths):
             raise ValueError('block bound arrays must be same length')
 
-        for i from 0 <= i < self.nblocks:
+        for i in range(self.nblocks):
             if i > 0:
                 if blocs[i] <= blocs[i - 1]:
                     raise ValueError('Locations not in ascending order')
 
             if i < self.nblocks - 1:
                 if blocs[i] + blengths[i] > blocs[i + 1]:
-                    raise ValueError('Block %d overlaps' % i)
+                    raise ValueError('Block {idx} overlaps'.format(idx=i))
             else:
                 if blocs[i] + blengths[i] > self.length:
-                    raise ValueError('Block %d extends beyond end' % i)
+                    raise ValueError('Block {idx} extends beyond end'
+                                     .format(idx=i))
 
             # no zero-length blocks
             if blengths[i] == 0:
-                raise ValueError('Zero-length block %d' % i)
+                raise ValueError('Zero-length block {idx}'.format(idx=i))
 
     def equals(self, other):
         if not isinstance(other, BlockIndex):
@@ -422,10 +423,10 @@ cdef class BlockIndex(SparseIndex):
 
         indices = np.empty(self.npoints, dtype=np.int32)
 
-        for b from 0 <= b < self.nblocks:
+        for b in range(self.nblocks):
             offset = self.locbuf[b]
 
-            for j from 0 <= j < self.lenbuf[b]:
+            for j in range(self.lenbuf[b]):
                 indices[i] = offset + j
                 i += 1
 
@@ -551,7 +552,7 @@ cdef class BlockIndex(SparseIndex):
             return -1
 
         cum_len = 0
-        for i from 0 <= i < self.nblocks:
+        for i in range(self.nblocks):
             if index >= locs[i] and index < locs[i] + lens[i]:
                 return cum_len + index - locs[i]
             cum_len += lens[i]
@@ -579,11 +580,11 @@ cdef class BlockIndex(SparseIndex):
         if self.npoints == 0:
             return results
 
-        for i from 0 <= i < n:
+        for i in range(n):
             ind_val = indexer[i]
             if not (ind_val < 0 or self.length <= ind_val):
                 cum_len = 0
-                for j from 0 <= j < self.nblocks:
+                for j in range(self.nblocks):
                     if ind_val >= locs[j] and ind_val < locs[j] + lens[j]:
                         results[i] = cum_len + ind_val - locs[j]
                     cum_len += lens[j]
@@ -824,7 +825,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map):
 
 #     out = np.empty(length, dtype=np.float64)
 
-#     for i from 0 <= i < length:
+#     for i in range(length):
 #         if indexer[i] == -1:
 #             pass
 

diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
@@ -190,7 +190,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_,
     # Wow, what a hack job. Need to do something about this
 
     # walk the two SparseVectors, adding matched locations...
-    for out_i from 0 <= out_i < out_index.npoints:
+    for out_i in range(out_index.npoints):
         if yblock == yindex.nblocks:
             # use y fill value
             out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
@@ -286,7 +286,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex,
     out_indices = out_index.indices
 
     # walk the two SparseVectors, adding matched locations...
-    for out_i from 0 <= out_i < out_index.npoints:
+    for out_i in range(out_index.npoints):
         if xi == xindex.npoints:
             # use x fill value
             out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}

diff --git a/pandas/_libs/src/compat_helper.h b/pandas/_libs/src/compat_helper.h
@@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software.
 #define PANDAS__LIBS_SRC_COMPAT_HELPER_H_
 
 #include "Python.h"
-#include "helper.h"
+#include "inline_helper.h"
 
 /*
 PySlice_GetIndicesEx changes signature in PY3