Skip to content

Commit

Permalink
[CLN] More Misc Cleanups in _libs (pandas-dev#22287)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Aug 20, 2018
1 parent a3c50a6 commit b6e35ff
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 93 deletions.
7 changes: 5 additions & 2 deletions pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from util cimport numeric
from numpy cimport float64_t, double_t


cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil


cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
cdef numeric t
cdef:
numeric t

# cython doesn't allow pointer dereference so use array syntax
t = a[0]
a[0] = b[0]
b[0] = t
return 0


cdef enum TiebreakEnumType:
TIEBREAK_AVERAGE
TIEBREAK_MIN,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tiebreakers = {
}


cdef inline are_diff(object left, object right):
cdef inline bint are_diff(object left, object right):
try:
return fabs(left - right) > FP_ERR
except TypeError:
Expand Down
59 changes: 33 additions & 26 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef {{c_type}} cur, next
cdef int lim, fill_count = 0
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, next
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
Expand Down Expand Up @@ -135,9 +135,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0

N = len(values)

Expand Down Expand Up @@ -171,9 +172,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
int lim, fill_count = 0

K, N = (<object> values).shape

Expand Down Expand Up @@ -233,10 +235,11 @@ D
@cython.wraparound(False)
def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef {{c_type}} cur, prev
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, prev
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
Expand Down Expand Up @@ -299,9 +302,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0

N = len(values)

Expand Down Expand Up @@ -335,9 +339,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
int lim, fill_count = 0

K, N = (<object> values).shape

Expand Down Expand Up @@ -428,10 +433,10 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
cdef Py_ssize_t length = index.shape[0]
cdef Py_ssize_t i = 0

cdef ndarray[object] result = np.empty(length, dtype=np.object_)
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
ndarray[object] result = np.empty(length, dtype=np.object_)

from pandas._libs.lib import maybe_convert_objects

Expand Down Expand Up @@ -535,6 +540,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,

cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num


cpdef ensure_platform_int(object arr):
# GH3033, GH1392
# platform int is the size of the int pointer, e.g. np.intp
Expand All @@ -546,6 +552,7 @@ cpdef ensure_platform_int(object arr):
else:
return np.array(arr, dtype=np.intp)


cpdef ensure_object(object arr):
if util.is_array(arr):
if (<ndarray> arr).descr.type_num == NPY_OBJECT:
Expand Down
9 changes: 5 additions & 4 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ cdef double nan = NaN


cdef inline float64_t median_linear(float64_t* a, int n) nogil:
cdef int i, j, na_count = 0
cdef float64_t result
cdef float64_t* tmp
cdef:
int i, j, na_count = 0
float64_t result
float64_t* tmp

if n == 0:
return NaN
Expand Down Expand Up @@ -318,7 +319,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

# If we move to the next group, reset
# the fill_idx and counter
if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]:
curr_fill_idx = -1
filled_vals = 0

Expand Down
16 changes: 8 additions & 8 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
k = <bytes>key.encode(encoding)
kb = <uint8_t *>k
if len(k) != 16:
raise ValueError(
'key should be a 16-byte string encoded, got {!r} (len {})'.format(
k, len(k)))
raise ValueError("key should be a 16-byte string encoded, "
"got {key} (len {klen})".format(key=k, klen=len(k)))

n = len(arr)

Expand All @@ -70,8 +69,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
data = <bytes>str(val).encode(encoding)

else:
raise TypeError("{} of type {} is not a valid type for hashing, "
"must be string or null".format(val, type(val)))
raise TypeError("{val} of type {typ} is not a valid type "
"for hashing, must be string or null"
.format(val=val, typ=type(val)))

l = len(data)
lens[i] = l
Expand Down Expand Up @@ -134,9 +134,9 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,

cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError(
'key should be a 16-byte bytestring, got {!r} (len {})'.format(
key, len(key)))
raise ValueError("key should be a 16-byte bytestring, "
"got {key} (len {klen})"
.format(key=key, klen=len(key)))
return low_level_siphash(data, len(data), key)


Expand Down
14 changes: 10 additions & 4 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ cdef class {{name}}Vector:

if needs_resize(self.data):
if self.external_view_exists:
raise ValueError("external reference but Vector.resize() needed")
raise ValueError("external reference but "
"Vector.resize() needed")
self.resize()

append_data_{{dtype}}(self.data, x)
Expand Down Expand Up @@ -194,6 +195,7 @@ cdef class StringVector:
for i in range(len(x)):
self.append(x[i])


cdef class ObjectVector:

cdef:
Expand All @@ -215,7 +217,8 @@ cdef class ObjectVector:
cdef inline append(self, object o):
if self.n == self.m:
if self.external_view_exists:
raise ValueError("external reference but Vector.resize() needed")
raise ValueError("external reference but "
"Vector.resize() needed")
self.m = max(self.m * 2, _INIT_VEC_CAP)
self.ao.resize(self.m, refcheck=False)
self.data = <PyObject**> self.ao.data
Expand Down Expand Up @@ -405,8 +408,9 @@ cdef class {{name}}HashTable(HashTable):
if needs_resize(ud):
with gil:
if uniques.external_view_exists:
raise ValueError("external reference to uniques held, "
"but Vector.resize() needed")
raise ValueError("external reference to "
"uniques held, but "
"Vector.resize() needed")
uniques.resize()
append_data_{{dtype}}(ud, val)
labels[i] = count
Expand Down Expand Up @@ -742,8 +746,10 @@ cdef class StringHashTable(HashTable):

return np.asarray(labels)


na_sentinel = object


cdef class PyObjectHashTable(HashTable):

def __init__(self, size_hint=1):
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{endfor}}



#----------------------------------------------------------------------
# Mode Computations
#----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'
cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):

if (util.is_array(obj) or
isinstance(obj, list) and len(obj) == cnt or
(isinstance(obj, list) and len(obj) == cnt) or
getattr(obj, 'shape', None) == (cnt,)):
raise ValueError('function does not reduce')

Expand Down
22 changes: 12 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ cdef inline object create_datetime_from_ts(
return datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz)


cdef inline object create_date_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
""" convenience routine to construct a datetime.date from its parts """
return date(dts.year, dts.month, dts.day)


cdef inline object create_time_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
Expand Down Expand Up @@ -350,8 +352,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):

if ((fvalues < _NS_LOWER_BOUND).any()
or (fvalues > _NS_UPPER_BOUND).any()):
raise OutOfBoundsDatetime(
"cannot convert input with unit '{0}'".format(unit))
raise OutOfBoundsDatetime("cannot convert input with unit "
"'{unit}'".format(unit=unit))
result = (iresult * m).astype('M8[ns]')
iresult = result.view('i8')
iresult[mask] = iNaT
Expand All @@ -377,8 +379,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
except OverflowError:
if is_raise:
raise OutOfBoundsDatetime(
"cannot convert input {0} with the unit "
"'{1}'".format(val, unit))
"cannot convert input {val} with the unit "
"'{unit}'".format(val=val, unit=unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
Expand All @@ -393,16 +395,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
except ValueError:
if is_raise:
raise ValueError(
"non convertible value {0} with the unit "
"'{1}'".format(val, unit))
"non convertible value {val} with the unit "
"'{unit}'".format(val=val, unit=unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
except:
if is_raise:
raise OutOfBoundsDatetime(
"cannot convert input {0} with the unit "
"'{1}'".format(val, unit))
"cannot convert input {val} with the unit "
"'{unit}'".format(val=val, unit=unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
Expand Down Expand Up @@ -695,8 +697,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
if is_coerce:
iresult[i] = NPY_NAT
else:
raise TypeError("{0} is not convertible to datetime"
.format(type(val)))
raise TypeError("{typ} is not convertible to datetime"
.format(typ=type(val)))

if seen_datetime and seen_integer:
# we have mixed datetimes & integers
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cpdef bint tz_compare(object start, object end)
cpdef object get_timezone(object tz)
cpdef object maybe_get_tz(object tz)

cpdef get_utcoffset(tzinfo, obj)
cdef get_utcoffset(tzinfo, obj)
cdef bint is_fixed_offset(object tz)

cdef object get_dst_info(object tz)
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ cdef inline object tz_cache_key(object tz):
# UTC Offsets


cpdef get_utcoffset(tzinfo, obj):
cdef get_utcoffset(tzinfo, obj):
try:
return tzinfo._utcoffset
except AttributeError:
Expand Down Expand Up @@ -186,7 +186,7 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz):
return new_trans


cpdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef:
Py_ssize_t i, sz
int64_t[:] arr
Expand Down
Loading

0 comments on commit b6e35ff

Please sign in to comment.