Skip to content

Commit

Permalink
CLN: Assorted _libs cleanups (pandas-dev#22235)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and gfyoung committed Aug 10, 2018
1 parent 475e391 commit 7390963
Show file tree
Hide file tree
Showing 31 changed files with 201 additions and 268 deletions.
2 changes: 1 addition & 1 deletion pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
Py_ssize_t i, j, k

k = len(values)
for j from 0 <= j < k:
for j in range(k):
i = indexer[j]
out[i] = values[j, loc]

Expand Down
22 changes: 11 additions & 11 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_dispatch(dtypes):
fv = fill_value

%(nogil_str)s
%(tab)sfor i from 0 <= i < n:
%(tab)sfor i in range(n):
%(tab)s idx = indexer[i]
%(tab)s if idx == -1:
%(tab)s out[i] = fv
Expand Down Expand Up @@ -74,24 +74,24 @@ def get_dispatch(dtypes):
values.strides[1] == sizeof(%(c_type_out)s) and
sizeof(%(c_type_out)s) * n >= 256):

for i from 0 <= i < n:
for i in range(n):
idx = indexer[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
v = &values[idx, 0]
o = &out[i, 0]
memmove(o, v, <size_t>(sizeof(%(c_type_out)s) * k))
return

for i from 0 <= i < n:
for i in range(n):
idx = indexer[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = %(preval)svalues[idx, j]%(postval)s
"""

Expand All @@ -108,8 +108,8 @@ def get_dispatch(dtypes):

fv = fill_value

for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(k):
idx = indexer[j]
if idx == -1:
out[i, j] = fv
Expand Down Expand Up @@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
k = len(idx1)

fv = fill_value
for i from 0 <= i < n:
for i in range(n):
idx = idx0[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
for j from 0 <= j < k:
for j in range(k):
if idx1[j] == -1:
out[i, j] = fv
else:
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
{{endif}}
elif keep == 'first':
{{if dtype == 'object'}}
for i from 0 <= i < n:
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
for i from 0 <= i < n:
for i in range(n):
kh_put_{{ttype}}(table, values[i], &ret)
out[i] = ret == 0
{{endif}}
else:
{{if dtype == 'object'}}
for i from 0 <= i < n:
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, <PyObject*> value)
if k != table.n_buckets:
Expand All @@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
out[i] = 0
{{else}}
with nogil:
for i from 0 <= i < n:
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, value)
if k != table.n_buckets:
Expand Down
23 changes: 12 additions & 11 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -78,29 +78,30 @@ cdef bint PY2 = sys.version_info[0] == 2
cdef double nan = <double>np.NaN


def values_from_object(object o):
def values_from_object(object obj):
""" return my values or the object if we are say an ndarray """
cdef f
cdef func # TODO: Does declaring this without a type accomplish anything?

f = getattr(o, 'get_values', None)
if f is not None:
o = f()
func = getattr(obj, 'get_values', None)
if func is not None:
obj = func()

return o
return obj


@cython.wraparound(False)
@cython.boundscheck(False)
def memory_usage_of_objects(ndarray[object, ndim=1] arr):
def memory_usage_of_objects(object[:] arr):
""" return the memory usage of an object array in bytes,
does not include the actual bytes of the pointers """
cdef Py_ssize_t i, n
cdef int64_t s = 0
cdef:
Py_ssize_t i, n
int64_t size = 0

n = len(arr)
for i in range(n):
s += arr[i].__sizeof__()
return s
size += arr[i].__sizeof__()
return size


# ----------------------------------------------------------------------
Expand Down
35 changes: 17 additions & 18 deletions pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
# cython: profile=False
import operator

from cpython cimport (PyFloat_Check, PyBool_Check,
Expand All @@ -21,7 +20,7 @@ from missing cimport checknull

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_compare(ndarray[object] values, object val, object op):
def scalar_compare(object[:] values, object val, object op):
"""
Compare each element of `values` array with the scalar `val`, with
the comparison operation described by `op`.
Expand Down Expand Up @@ -73,7 +72,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
else:
try:
result[i] = PyObject_RichCompareBool(x, val, flag)
except (TypeError):
except TypeError:
result[i] = True
elif flag == Py_EQ:
for i in range(n):
Expand All @@ -85,7 +84,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
else:
try:
result[i] = PyObject_RichCompareBool(x, val, flag)
except (TypeError):
except TypeError:
result[i] = False

else:
Expand All @@ -103,7 +102,7 @@ def scalar_compare(ndarray[object] values, object val, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def vec_compare(ndarray[object] left, ndarray[object] right, object op):
def vec_compare(object[:] left, object[:] right, object op):
"""
Compare the elements of `left` with the elements of `right` pointwise,
with the comparison operation described by `op`.
Expand All @@ -126,8 +125,8 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
int flag

if n != len(right):
raise ValueError('Arrays were different lengths: %d vs %d'
% (n, len(right)))
raise ValueError('Arrays were different lengths: {n} vs {nright}'
.format(n=n, nright=len(right)))

if op is operator.lt:
flag = Py_LT
Expand Down Expand Up @@ -170,7 +169,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_binop(ndarray[object] values, object val, object op):
def scalar_binop(object[:] values, object val, object op):
"""
Apply the given binary operator `op` between each element of the array
`values` and the scalar `val`.
Expand All @@ -187,13 +186,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
"""
cdef:
Py_ssize_t i, n = len(values)
ndarray[object] result
object[:] result
object x

result = np.empty(n, dtype=object)
if val is None or is_nan(val):
result.fill(val)
return result
result[:] = val
return result.base # `.base` to access underlying np.ndarray

for i in range(n):
x = values[i]
Expand All @@ -202,12 +201,12 @@ def scalar_binop(ndarray[object] values, object val, object op):
else:
result[i] = op(x, val)

return maybe_convert_bool(result)
return maybe_convert_bool(result.base)


@cython.wraparound(False)
@cython.boundscheck(False)
def vec_binop(ndarray[object] left, ndarray[object] right, object op):
def vec_binop(object[:] left, object[:] right, object op):
"""
Apply the given binary operator `op` pointwise to the elements of
arrays `left` and `right`.
Expand All @@ -224,11 +223,11 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
"""
cdef:
Py_ssize_t i, n = len(left)
ndarray[object] result
object[:] result

if n != len(right):
raise ValueError('Arrays were different lengths: %d vs %d'
% (n, len(right)))
raise ValueError('Arrays were different lengths: {n} vs {nright}'
.format(n=n, nright=len(right)))

result = np.empty(n, dtype=object)

Expand All @@ -245,7 +244,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
else:
raise

return maybe_convert_bool(result)
return maybe_convert_bool(result.base) # `.base` to access np.ndarray


def maybe_convert_bool(ndarray[object] arr,
Expand All @@ -270,7 +269,7 @@ def maybe_convert_bool(ndarray[object] arr,
if false_values is not None:
false_vals = false_vals | set(false_values)

for i from 0 <= i < n:
for i in range(n):
val = arr[i]

if PyBool_Check(val):
Expand Down
25 changes: 13 additions & 12 deletions pandas/_libs/sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ cdef class IntIndex(SparseIndex):
new_indices = np.empty(min(
len(xindices), len(yindices)), dtype=np.int32)

for xi from 0 <= xi < self.npoints:
for xi in range(self.npoints):
xind = xindices[xi]

while yi < y.npoints and yindices[yi] < xind:
Expand Down Expand Up @@ -292,7 +292,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices):

# TODO: two-pass algorithm faster?
prev = block = indices[0]
for i from 1 <= i < npoints:
for i in range(1, npoints):
cur = indices[i]
if cur - prev > 1:
# new block
Expand Down Expand Up @@ -383,21 +383,22 @@ cdef class BlockIndex(SparseIndex):
if len(blocs) != len(blengths):
raise ValueError('block bound arrays must be same length')

for i from 0 <= i < self.nblocks:
for i in range(self.nblocks):
if i > 0:
if blocs[i] <= blocs[i - 1]:
raise ValueError('Locations not in ascending order')

if i < self.nblocks - 1:
if blocs[i] + blengths[i] > blocs[i + 1]:
raise ValueError('Block %d overlaps' % i)
raise ValueError('Block {idx} overlaps'.format(idx=i))
else:
if blocs[i] + blengths[i] > self.length:
raise ValueError('Block %d extends beyond end' % i)
raise ValueError('Block {idx} extends beyond end'
.format(idx=i))

# no zero-length blocks
if blengths[i] == 0:
raise ValueError('Zero-length block %d' % i)
raise ValueError('Zero-length block {idx}'.format(idx=i))

def equals(self, other):
if not isinstance(other, BlockIndex):
Expand All @@ -422,10 +423,10 @@ cdef class BlockIndex(SparseIndex):

indices = np.empty(self.npoints, dtype=np.int32)

for b from 0 <= b < self.nblocks:
for b in range(self.nblocks):
offset = self.locbuf[b]

for j from 0 <= j < self.lenbuf[b]:
for j in range(self.lenbuf[b]):
indices[i] = offset + j
i += 1

Expand Down Expand Up @@ -551,7 +552,7 @@ cdef class BlockIndex(SparseIndex):
return -1

cum_len = 0
for i from 0 <= i < self.nblocks:
for i in range(self.nblocks):
if index >= locs[i] and index < locs[i] + lens[i]:
return cum_len + index - locs[i]
cum_len += lens[i]
Expand Down Expand Up @@ -579,11 +580,11 @@ cdef class BlockIndex(SparseIndex):
if self.npoints == 0:
return results

for i from 0 <= i < n:
for i in range(n):
ind_val = indexer[i]
if not (ind_val < 0 or self.length <= ind_val):
cum_len = 0
for j from 0 <= j < self.nblocks:
for j in range(self.nblocks):
if ind_val >= locs[j] and ind_val < locs[j] + lens[j]:
results[i] = cum_len + ind_val - locs[j]
cum_len += lens[j]
Expand Down Expand Up @@ -824,7 +825,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map):

# out = np.empty(length, dtype=np.float64)

# for i from 0 <= i < length:
# for i in range(length):
# if indexer[i] == -1:
# pass

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/sparse_op_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_,
# Wow, what a hack job. Need to do something about this

# walk the two SparseVectors, adding matched locations...
for out_i from 0 <= out_i < out_index.npoints:
for out_i in range(out_index.npoints):
if yblock == yindex.nblocks:
# use y fill value
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
Expand Down Expand Up @@ -286,7 +286,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex,
out_indices = out_index.indices

# walk the two SparseVectors, adding matched locations...
for out_i from 0 <= out_i < out_index.npoints:
for out_i in range(out_index.npoints):
if xi == xindex.npoints:
# use x fill value
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/compat_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software.
#define PANDAS__LIBS_SRC_COMPAT_HELPER_H_

#include "Python.h"
#include "helper.h"
#include "inline_helper.h"

/*
PySlice_GetIndicesEx changes signature in PY3
Expand Down
Loading

0 comments on commit 7390963

Please sign in to comment.