Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assorted _libs cleanups #22235

Merged
merged 20 commits into from
Aug 10, 2018
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
Py_ssize_t i, j, k

k = len(values)
for j from 0 <= j < k:
for j in range(k):
i = indexer[j]
out[i] = values[j, loc]

Expand Down
22 changes: 11 additions & 11 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_dispatch(dtypes):
fv = fill_value

%(nogil_str)s
%(tab)sfor i from 0 <= i < n:
%(tab)sfor i in range(n):
%(tab)s idx = indexer[i]
%(tab)s if idx == -1:
%(tab)s out[i] = fv
Expand Down Expand Up @@ -74,24 +74,24 @@ def get_dispatch(dtypes):
values.strides[1] == sizeof(%(c_type_out)s) and
sizeof(%(c_type_out)s) * n >= 256):

for i from 0 <= i < n:
for i in range(n):
idx = indexer[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
v = &values[idx, 0]
o = &out[i, 0]
memmove(o, v, <size_t>(sizeof(%(c_type_out)s) * k))
return

for i from 0 <= i < n:
for i in range(n):
idx = indexer[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = %(preval)svalues[idx, j]%(postval)s
"""

Expand All @@ -108,8 +108,8 @@ def get_dispatch(dtypes):

fv = fill_value

for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(k):
idx = indexer[j]
if idx == -1:
out[i, j] = fv
Expand Down Expand Up @@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
k = len(idx1)

fv = fill_value
for i from 0 <= i < n:
for i in range(n):
idx = idx0[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
for j from 0 <= j < k:
for j in range(k):
if idx1[j] == -1:
out[i, j] = fv
else:
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
{{endif}}
elif keep == 'first':
{{if dtype == 'object'}}
for i from 0 <= i < n:
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
for i from 0 <= i < n:
for i in range(n):
kh_put_{{ttype}}(table, values[i], &ret)
out[i] = ret == 0
{{endif}}
else:
{{if dtype == 'object'}}
for i from 0 <= i < n:
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, <PyObject*> value)
if k != table.n_buckets:
Expand All @@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
out[i] = 0
{{else}}
with nogil:
for i from 0 <= i < n:
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, value)
if k != table.n_buckets:
Expand Down
43 changes: 22 additions & 21 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,30 @@ cdef int64_t NPY_NAT = util.get_nat()
from util cimport is_array, is_nan


def values_from_object(object o):
def values_from_object(object obj):
""" return my values or the object if we are say an ndarray """
cdef f
cdef func # TODO: Does declaring this without a type accomplish anything?

f = getattr(o, 'get_values', None)
if f is not None:
o = f()
func = getattr(obj, 'get_values', None)
if func is not None:
obj = func()

return o
return obj


@cython.wraparound(False)
@cython.boundscheck(False)
def memory_usage_of_objects(ndarray[object, ndim=1] arr):
def memory_usage_of_objects(object[:] arr):
""" return the memory usage of an object array in bytes,
does not include the actual bytes of the pointers """
cdef Py_ssize_t i, n
cdef int64_t s = 0
cdef:
Py_ssize_t i, n
int64_t size = 0

n = len(arr)
for i from 0 <= i < n:
s += arr[i].__sizeof__()
return s
for i in range(n):
size += arr[i].__sizeof__()
return size


# ----------------------------------------------------------------------
Expand Down Expand Up @@ -138,10 +139,10 @@ def fast_unique_multiple(list arrays):
dict table = {}
object val, stub = 0

for i from 0 <= i < k:
for i in range(k):
buf = arrays[i]
n = len(buf)
for j from 0 <= j < n:
for j in range(n):
val = buf[j]
if val not in table:
table[val] = stub
Expand All @@ -165,10 +166,10 @@ def fast_unique_multiple_list(list lists, bint sort=True):
dict table = {}
object val, stub = 0

for i from 0 <= i < k:
for i in range(k):
buf = lists[i]
n = len(buf)
for j from 0 <= j < n:
for j in range(n):
val = buf[j]
if val not in table:
table[val] = stub
Expand Down Expand Up @@ -208,7 +209,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True):

for buf in gen:
n = len(buf)
for j from 0 <= j < n:
for j in range(n):
val = buf[j]
if val not in table:
table[val] = stub
Expand Down Expand Up @@ -669,15 +670,15 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
if axis == 0:
counts = np.zeros((max_bin, k), dtype='i8')
with nogil:
for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(k):
counts[labels[i], j] += mask[i, j]

else: # axis == 1
counts = np.zeros((n, max_bin), dtype='i8')
with nogil:
for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(k):
counts[i, labels[j]] += mask[i, j]

return counts
Expand Down
35 changes: 17 additions & 18 deletions pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
# cython: profile=False
import operator

from cpython cimport (PyFloat_Check, PyBool_Check,
Expand All @@ -21,7 +20,7 @@ from missing cimport checknull

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_compare(ndarray[object] values, object val, object op):
def scalar_compare(object[:] values, object val, object op):
"""
Compare each element of `values` array with the scalar `val`, with
the comparison operation described by `op`.
Expand Down Expand Up @@ -73,7 +72,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
else:
try:
result[i] = PyObject_RichCompareBool(x, val, flag)
except (TypeError):
except TypeError:
result[i] = True
elif flag == Py_EQ:
for i in range(n):
Expand All @@ -85,7 +84,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
else:
try:
result[i] = PyObject_RichCompareBool(x, val, flag)
except (TypeError):
except TypeError:
result[i] = False

else:
Expand All @@ -103,7 +102,7 @@ def scalar_compare(ndarray[object] values, object val, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def vec_compare(ndarray[object] left, ndarray[object] right, object op):
def vec_compare(object[:] left, object[:] right, object op):
"""
Compare the elements of `left` with the elements of `right` pointwise,
with the comparison operation described by `op`.
Expand All @@ -126,8 +125,8 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
int flag

if n != len(right):
raise ValueError('Arrays were different lengths: %d vs %d'
% (n, len(right)))
raise ValueError('Arrays were different lengths: {n} vs {nright}'
.format(n=n, nright=len(right)))

if op is operator.lt:
flag = Py_LT
Expand Down Expand Up @@ -170,7 +169,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_binop(ndarray[object] values, object val, object op):
def scalar_binop(object[:] values, object val, object op):
"""
Apply the given binary operator `op` between each element of the array
`values` and the scalar `val`.
Expand All @@ -187,13 +186,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
"""
cdef:
Py_ssize_t i, n = len(values)
ndarray[object] result
object[:] result
object x

result = np.empty(n, dtype=object)
if val is None or is_nan(val):
result.fill(val)
return result
result[:] = val
return result.base # `.base` to access underlying np.ndarray

for i in range(n):
x = values[i]
Expand All @@ -202,12 +201,12 @@ def scalar_binop(ndarray[object] values, object val, object op):
else:
result[i] = op(x, val)

return maybe_convert_bool(result)
return maybe_convert_bool(result.base)


@cython.wraparound(False)
@cython.boundscheck(False)
def vec_binop(ndarray[object] left, ndarray[object] right, object op):
def vec_binop(object[:] left, object[:] right, object op):
"""
Apply the given binary operator `op` pointwise to the elements of
arrays `left` and `right`.
Expand All @@ -224,11 +223,11 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
"""
cdef:
Py_ssize_t i, n = len(left)
ndarray[object] result
object[:] result

if n != len(right):
raise ValueError('Arrays were different lengths: %d vs %d'
% (n, len(right)))
raise ValueError('Arrays were different lengths: {n} vs {nright}'
.format(n=n, nright=len(right)))

result = np.empty(n, dtype=object)

Expand All @@ -245,7 +244,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
else:
raise

return maybe_convert_bool(result)
return maybe_convert_bool(result.base) # `.base` to access np.ndarray


def maybe_convert_bool(ndarray[object] arr,
Expand All @@ -270,7 +269,7 @@ def maybe_convert_bool(ndarray[object] arr,
if false_values is not None:
false_vals = false_vals | set(false_values)

for i from 0 <= i < n:
for i in range(n):
val = arr[i]

if PyBool_Check(val):
Expand Down
Loading