diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 0d3f6664da9e3..97b7196da80bb 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, Py_ssize_t i, j, k k = len(values) - for j from 0 <= j < k: + for j in range(k): i = indexer[j] out[i] = values[j, loc] diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 71bb1bb4fe9be..0e69324acd341 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -46,7 +46,7 @@ def get_dispatch(dtypes): fv = fill_value %(nogil_str)s - %(tab)sfor i from 0 <= i < n: + %(tab)sfor i in range(n): %(tab)s idx = indexer[i] %(tab)s if idx == -1: %(tab)s out[i] = fv @@ -74,10 +74,10 @@ def get_dispatch(dtypes): values.strides[1] == sizeof(%(c_type_out)s) and sizeof(%(c_type_out)s) * n >= 256): - for i from 0 <= i < n: + for i in range(n): idx = indexer[i] if idx == -1: - for j from 0 <= j < k: + for j in range(k): out[i, j] = fv else: v = &values[idx, 0] @@ -85,13 +85,13 @@ def get_dispatch(dtypes): memmove(o, v, (sizeof(%(c_type_out)s) * k)) return - for i from 0 <= i < n: + for i in range(n): idx = indexer[i] if idx == -1: - for j from 0 <= j < k: + for j in range(k): out[i, j] = fv else: - for j from 0 <= j < k: + for j in range(k): out[i, j] = %(preval)svalues[idx, j]%(postval)s """ @@ -108,8 +108,8 @@ def get_dispatch(dtypes): fv = fill_value - for i from 0 <= i < n: - for j from 0 <= j < k: + for i in range(n): + for j in range(k): idx = indexer[j] if idx == -1: out[i, j] = fv @@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, k = len(idx1) fv = fill_value - for i from 0 <= i < n: + for i in range(n): idx = idx0[i] if idx == -1: - for j from 0 <= j < k: + for j in range(k): out[i, j] = fv else: - for j from 0 <= j < k: + for j in range(k): if idx1[j] == -1: out[i, j] = fv else: diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 521e564447c59..5aea0c65d6dd0 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): {{endif}} elif keep == 'first': {{if dtype == 'object'}} - for i from 0 <= i < n: + for i in range(n): kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{else}} with nogil: - for i from 0 <= i < n: + for i in range(n): kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{endif}} else: {{if dtype == 'object'}} - for i from 0 <= i < n: + for i in range(n): value = values[i] k = kh_get_{{ttype}}(table, value) if k != table.n_buckets: @@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): out[i] = 0 {{else}} with nogil: - for i from 0 <= i < n: + for i in range(n): value = values[i] k = kh_get_{{ttype}}(table, value) if k != table.n_buckets: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 89a96342a414b..e05905ab63624 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -78,29 +78,30 @@ cdef bint PY2 = sys.version_info[0] == 2 cdef double nan = np.NaN -def values_from_object(object o): +def values_from_object(object obj): """ return my values or the object if we are say an ndarray """ - cdef f + cdef func # TODO: Does declaring this without a type accomplish anything? - f = getattr(o, 'get_values', None) - if f is not None: - o = f() + func = getattr(obj, 'get_values', None) + if func is not None: + obj = func() - return o + return obj @cython.wraparound(False) @cython.boundscheck(False) -def memory_usage_of_objects(ndarray[object, ndim=1] arr): +def memory_usage_of_objects(object[:] arr): """ return the memory usage of an object array in bytes, does not include the actual bytes of the pointers """ - cdef Py_ssize_t i, n - cdef int64_t s = 0 + cdef: + Py_ssize_t i, n + int64_t size = 0 n = len(arr) for i in range(n): - s += arr[i].__sizeof__() - return s + size += arr[i].__sizeof__() + return size # ---------------------------------------------------------------------- diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index ec9d8304f9243..a194f1588e231 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False import operator from cpython cimport (PyFloat_Check, PyBool_Check, @@ -21,7 +20,7 @@ from missing cimport checknull @cython.wraparound(False) @cython.boundscheck(False) -def scalar_compare(ndarray[object] values, object val, object op): +def scalar_compare(object[:] values, object val, object op): """ Compare each element of `values` array with the scalar `val`, with the comparison operation described by `op`. @@ -73,7 +72,7 @@ def scalar_compare(ndarray[object] values, object val, object op): else: try: result[i] = PyObject_RichCompareBool(x, val, flag) - except (TypeError): + except TypeError: result[i] = True elif flag == Py_EQ: for i in range(n): @@ -85,7 +84,7 @@ def scalar_compare(ndarray[object] values, object val, object op): else: try: result[i] = PyObject_RichCompareBool(x, val, flag) - except (TypeError): + except TypeError: result[i] = False else: @@ -103,7 +102,7 @@ def scalar_compare(ndarray[object] values, object val, object op): @cython.wraparound(False) @cython.boundscheck(False) -def vec_compare(ndarray[object] left, ndarray[object] right, object op): +def vec_compare(object[:] left, object[:] right, object op): """ Compare the elements of `left` with the elements of `right` pointwise, with the comparison operation described by `op`. @@ -126,8 +125,8 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): int flag if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) + raise ValueError('Arrays were different lengths: {n} vs {nright}' + .format(n=n, nright=len(right))) if op is operator.lt: flag = Py_LT @@ -170,7 +169,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(ndarray[object] values, object val, object op): +def scalar_binop(object[:] values, object val, object op): """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -187,13 +186,13 @@ def scalar_binop(ndarray[object] values, object val, object op): """ cdef: Py_ssize_t i, n = len(values) - ndarray[object] result + object[:] result object x result = np.empty(n, dtype=object) if val is None or is_nan(val): - result.fill(val) - return result + result[:] = val + return result.base # `.base` to access underlying np.ndarray for i in range(n): x = values[i] @@ -202,12 +201,12 @@ def scalar_binop(ndarray[object] values, object val, object op): else: result[i] = op(x, val) - return maybe_convert_bool(result) + return maybe_convert_bool(result.base) @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(ndarray[object] left, ndarray[object] right, object op): +def vec_binop(object[:] left, object[:] right, object op): """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. @@ -224,11 +223,11 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op): """ cdef: Py_ssize_t i, n = len(left) - ndarray[object] result + object[:] result if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) + raise ValueError('Arrays were different lengths: {n} vs {nright}' + .format(n=n, nright=len(right))) result = np.empty(n, dtype=object) @@ -245,7 +244,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op): else: raise - return maybe_convert_bool(result) + return maybe_convert_bool(result.base) # `.base` to access np.ndarray def maybe_convert_bool(ndarray[object] arr, @@ -270,7 +269,7 @@ def maybe_convert_bool(ndarray[object] arr, if false_values is not None: false_vals = false_vals | set(false_values) - for i from 0 <= i < n: + for i in range(n): val = arr[i] if PyBool_Check(val): diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 2abd270652433..7f5990ce5d65c 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -148,7 +148,7 @@ cdef class IntIndex(SparseIndex): new_indices = np.empty(min( len(xindices), len(yindices)), dtype=np.int32) - for xi from 0 <= xi < self.npoints: + for xi in range(self.npoints): xind = xindices[xi] while yi < y.npoints and yindices[yi] < xind: @@ -292,7 +292,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices): # TODO: two-pass algorithm faster? prev = block = indices[0] - for i from 1 <= i < npoints: + for i in range(1, npoints): cur = indices[i] if cur - prev > 1: # new block @@ -383,21 +383,22 @@ cdef class BlockIndex(SparseIndex): if len(blocs) != len(blengths): raise ValueError('block bound arrays must be same length') - for i from 0 <= i < self.nblocks: + for i in range(self.nblocks): if i > 0: if blocs[i] <= blocs[i - 1]: raise ValueError('Locations not in ascending order') if i < self.nblocks - 1: if blocs[i] + blengths[i] > blocs[i + 1]: - raise ValueError('Block %d overlaps' % i) + raise ValueError('Block {idx} overlaps'.format(idx=i)) else: if blocs[i] + blengths[i] > self.length: - raise ValueError('Block %d extends beyond end' % i) + raise ValueError('Block {idx} extends beyond end' + .format(idx=i)) # no zero-length blocks if blengths[i] == 0: - raise ValueError('Zero-length block %d' % i) + raise ValueError('Zero-length block {idx}'.format(idx=i)) def equals(self, other): if not isinstance(other, BlockIndex): @@ -422,10 +423,10 @@ cdef class BlockIndex(SparseIndex): indices = np.empty(self.npoints, dtype=np.int32) - for b from 0 <= b < self.nblocks: + for b in range(self.nblocks): offset = self.locbuf[b] - for j from 0 <= j < self.lenbuf[b]: + for j in range(self.lenbuf[b]): indices[i] = offset + j i += 1 @@ -551,7 +552,7 @@ cdef class BlockIndex(SparseIndex): return -1 cum_len = 0 - for i from 0 <= i < self.nblocks: + for i in range(self.nblocks): if index >= locs[i] and index < locs[i] + lens[i]: return cum_len + index - locs[i] cum_len += lens[i] @@ -579,11 +580,11 @@ cdef class BlockIndex(SparseIndex): if self.npoints == 0: return results - for i from 0 <= i < n: + for i in range(n): ind_val = indexer[i] if not (ind_val < 0 or self.length <= ind_val): cum_len = 0 - for j from 0 <= j < self.nblocks: + for j in range(self.nblocks): if ind_val >= locs[j] and ind_val < locs[j] + lens[j]: results[i] = cum_len + ind_val - locs[j] cum_len += lens[j] @@ -824,7 +825,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map): # out = np.empty(length, dtype=np.float64) -# for i from 0 <= i < length: +# for i in range(length): # if indexer[i] == -1: # pass diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index d1d9a6f02a72c..2843a3cf7dd28 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -190,7 +190,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_, # Wow, what a hack job. Need to do something about this # walk the two SparseVectors, adding matched locations... - for out_i from 0 <= out_i < out_index.npoints: + for out_i in range(out_index.npoints): if yblock == yindex.nblocks: # use y fill value out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} @@ -286,7 +286,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex, out_indices = out_index.indices # walk the two SparseVectors, adding matched locations... - for out_i from 0 <= out_i < out_index.npoints: + for out_i in range(out_index.npoints): if xi == xindex.npoints: # use x fill value out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} diff --git a/pandas/_libs/src/compat_helper.h b/pandas/_libs/src/compat_helper.h index 116cd91070a60..462f53392adee 100644 --- a/pandas/_libs/src/compat_helper.h +++ b/pandas/_libs/src/compat_helper.h @@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software. #define PANDAS__LIBS_SRC_COMPAT_HELPER_H_ #include "Python.h" -#include "helper.h" +#include "inline_helper.h" /* PySlice_GetIndicesEx changes signature in PY3 diff --git a/pandas/_libs/src/helper.h b/pandas/_libs/src/inline_helper.h similarity index 80% rename from pandas/_libs/src/helper.h rename to pandas/_libs/src/inline_helper.h index 26b4d033b963b..397ec8e7b2cb8 100644 --- a/pandas/_libs/src/helper.h +++ b/pandas/_libs/src/inline_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS__LIBS_SRC_HELPER_H_ -#define PANDAS__LIBS_SRC_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_INLINE_HELPER_H_ +#define PANDAS__LIBS_SRC_INLINE_HELPER_H_ #ifndef PANDAS_INLINE #if defined(__GNUC__) @@ -22,4 +22,4 @@ The full license is in the LICENSE file, distributed with this software. #endif #endif -#endif // PANDAS__LIBS_SRC_HELPER_H_ +#endif // PANDAS__LIBS_SRC_INLINE_HELPER_H_ diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index 869607a44c001..77ec519cc24da 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -112,6 +112,7 @@ int main() { #include #include #include +#include "../inline_helper.h" #if UINT_MAX == 0xffffffffu @@ -130,18 +131,6 @@ typedef signed long long khint64_t; typedef double khfloat64_t; -#ifndef PANDAS_INLINE - #if defined(__GNUC__) - #define PANDAS_INLINE static __inline__ - #elif defined(_MSC_VER) - #define PANDAS_INLINE static __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define PANDAS_INLINE static inline - #else - #define PANDAS_INLINE - #endif -#endif - typedef khint32_t khint_t; typedef khint_t khiter_t; diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 753cba6ce62aa..d44334906901a 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software. #define PANDAS__LIBS_SRC_NUMPY_HELPER_H_ #include "Python.h" -#include "helper.h" +#include "inline_helper.h" #include "numpy/arrayobject.h" #include "numpy/arrayscalars.h" diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index d17d9166ea3ee..4f9f825b15ffe 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -12,7 +12,7 @@ The full license is in the LICENSE file, distributed with this software. #include #include -#include "helper.h" +#include "inline_helper.h" #include "headers/portable.h" static double xstrtod(const char *p, char **q, char decimal, char sci, diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index a18d12616a802..da0a9f7498aa8 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -363,7 +363,7 @@ static int push_char(parser_t *self, char c) { return 0; } -int P_INLINE end_field(parser_t *self) { +int PANDAS_INLINE end_field(parser_t *self) { // XXX cruft if (self->words_len >= self->words_cap) { TRACE( @@ -1381,11 +1381,11 @@ int tokenize_all_rows(parser_t *self) { return status; } -P_INLINE void uppercase(char *p) { +PANDAS_INLINE void uppercase(char *p) { for (; *p; ++p) *p = toupper(*p); } -int P_INLINE to_longlong(char *item, long long *p_value) { +int PANDAS_INLINE to_longlong(char *item, long long *p_value) { char *p_end; // Try integer conversion. We explicitly give the base to be 10. If diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 63baf91e3c136..9fc3593aaaf5b 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -27,6 +27,7 @@ See LICENSE for the license #define ERROR_INVALID_CHARS 3 #include "../headers/stdint.h" +#include "../inline_helper.h" #include "khash.h" @@ -38,17 +39,6 @@ See LICENSE for the license #define REACHED_EOF 1 #define CALLING_READ_FAILED 2 -#ifndef P_INLINE -#if defined(__GNUC__) -#define P_INLINE static __inline__ -#elif defined(_MSC_VER) -#define P_INLINE -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -#define P_INLINE static inline -#else -#define P_INLINE -#endif -#endif #if defined(_MSC_VER) #define strtoll _strtoi64 diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h index f9527e72f577e..60c1a56727777 100644 --- a/pandas/_libs/src/skiplist.h +++ b/pandas/_libs/src/skiplist.h @@ -20,18 +20,7 @@ Python recipe (http://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) #include #include #include - -#ifndef PANDAS_INLINE -#if defined(__GNUC__) -#define PANDAS_INLINE static __inline__ -#elif defined(_MSC_VER) -#define PANDAS_INLINE static __inline -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -#define PANDAS_INLINE static inline -#else -#define PANDAS_INLINE -#endif -#endif +#include "inline_helper.h" PANDAS_INLINE float __skiplist_nanf(void) { const union { diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 7e9e8b720872d..70a3f3f410636 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False import re cimport numpy as cnp diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index a585259286a58..c3d229d4e5006 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -46,7 +46,7 @@ cdef extern from "numpy/ndarraytypes.h": NPY_FR_fs NPY_FR_as -cdef extern from "../src/datetime/np_datetime.h": +cdef extern from "src/datetime/np_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 76838c7a23b24..a0099837e876a 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -15,7 +15,7 @@ PyDateTime_IMPORT from numpy cimport int64_t -cdef extern from "../src/datetime/np_datetime.h": +cdef extern from "src/datetime/np_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) @@ -33,7 +33,7 @@ cdef extern from "../src/datetime/np_datetime.h": npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS -cdef extern from "../src/datetime/np_datetime_strings.h": +cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(char *str, int len, npy_datetimestruct *out, int *out_local, int *out_tzoffset) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 7a279a41709ec..8c53fabffdbeb 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cimport cython from cython cimport Py_ssize_t @@ -31,17 +30,6 @@ from np_datetime cimport (npy_datetimestruct, # Constants -class WeekDay(object): - # TODO: Remove: This is not used outside of tests - MON = 0 - TUE = 1 - WED = 2 - THU = 3 - FRI = 4 - SAT = 5 - SUN = 6 - - _offset_to_period_map = { 'WEEKDAY': 'D', 'EOM': 'M', diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 61fb48c6913d3..6ee6c4b9d9026 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1,25 +1,22 @@ # -*- coding: utf-8 -*- -# cython: profile=False """ Parsing functions for datetime and datetime-like strings. """ import sys import re +import time -from cython cimport Py_ssize_t - +from cython import Py_ssize_t from cpython.datetime cimport datetime -import time + import numpy as np # Avoid import from outside _libs if sys.version_info.major == 2: - string_types = basestring from StringIO import StringIO else: - string_types = str from io import StringIO @@ -113,7 +110,9 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): ------- datetime, datetime/dateutil.parser._result, str """ - if not isinstance(arg, string_types): + if not isinstance(arg, (str, unicode)): + # Note: cython recognizes `unicode` in both py2/py3, optimizes + # this check into a C call. return arg if getattr(freq, "_typ", None) == "dateoffset": @@ -132,15 +131,22 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): return res -def parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, - yearfirst=False, **kwargs): +cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, + yearfirst=False): """parse datetime string, only returns datetime Returns ------- - datetime + parsed : datetime + parsed2 : datetime/dateutil.parser._result + reso : str + inferred resolution + + Raises + ------ + ValueError : preliminary check suggests string is not datetime + DateParseError : error within dateutil """ - cdef: object parsed, reso @@ -156,12 +162,13 @@ def parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, try: parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst) + dayfirst=dayfirst, yearfirst=yearfirst, + ignoretz=False, tzinfos=None) except Exception as e: # TODO: allow raise of errors within instead raise DateParseError(e) if parsed is None: - raise DateParseError("Could not parse %s" % date_string) + raise DateParseError("Could not parse {dstr}".format(dstr=date_string)) return parsed, parsed, reso @@ -190,7 +197,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, int year, quarter = -1, month, mnum, date_len # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 - assert isinstance(date_string, string_types) + assert isinstance(date_string, (str, unicode)) # len(date_string) == 0 # should be NaT??? @@ -243,8 +250,8 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, if not (1 <= quarter <= 4): msg = ('Incorrect quarterly string is given, quarter must be ' - 'between 1 and 4: {0}') - raise DateParseError(msg.format(date_string)) + 'between 1 and 4: {dstr}') + raise DateParseError(msg.format(dstr=date_string)) if freq is not None: # hack attack, #1228 @@ -252,7 +259,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, mnum = MONTH_NUMBERS[_get_rule_month(freq)] + 1 except (KeyError, ValueError): msg = ('Unable to retrieve month information from given ' - 'freq: {0}').format(freq) + 'freq: {freq}'.format(freq=freq)) raise DateParseError(msg) month = (mnum + (quarter - 1) * 3) % 12 + 1 @@ -289,8 +296,8 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, raise ValueError('Unable to parse {0}'.format(date_string)) -def dateutil_parse(object timestr, object default, ignoretz=False, - tzinfos=None, **kwargs): +cdef dateutil_parse(object timestr, object default, ignoretz=False, + tzinfos=None, dayfirst=None, yearfirst=None): """ lifted from dateutil to get resolution""" cdef: @@ -299,15 +306,15 @@ def dateutil_parse(object timestr, object default, ignoretz=False, dict repl = {} fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(fobj, **kwargs) + res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst) # dateutil 2.2 compat if isinstance(res, tuple): # PyTuple_Check res, _ = res if res is None: - msg = "Unknown datetime string format, unable to parse: {0}" - raise ValueError(msg.format(timestr)) + msg = "Unknown datetime string format, unable to parse: {timestr}" + raise ValueError(msg.format(timestr=timestr)) for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: @@ -317,8 +324,8 @@ def dateutil_parse(object timestr, object default, ignoretz=False, reso = attr if reso is None: - msg = "Unable to parse datetime string: {0}" - raise ValueError(msg.format(timestr)) + msg = "Unable to parse datetime string: {timestr}" + raise ValueError(msg.format(timestr=timestr)) if reso == 'microsecond': if repl['microsecond'] == 0: @@ -337,7 +344,7 @@ def dateutil_parse(object timestr, object default, ignoretz=False, tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, string_types): + elif isinstance(tzdata, (str, unicode)): tzinfo = _dateutil_tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tzoffset(res.tzname, tzdata) @@ -575,7 +582,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse, if dt_str_parse is None or dt_str_split is None: return None - if not isinstance(dt_str, string_types): + if not isinstance(dt_str, (str, unicode)): return None day_attribute_and_format = (('day',), '%d', 2) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 65b37759ce9ce..f68b6d8fdef57 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -26,12 +26,12 @@ from np_datetime cimport (npy_datetimestruct, dtstruct_to_dt64, pandas_datetime_to_datetimestruct, NPY_DATETIMEUNIT, NPY_FR_D) -cdef extern from "../src/datetime/np_datetime.h": +cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d) nogil cimport util -from util cimport is_period_object, is_string_object, INT32_MIN +from util cimport is_period_object, is_string_object from timestamps import Timestamp from timezones cimport is_utc, is_tzlocal, get_dst_info @@ -53,6 +53,8 @@ from offsets cimport to_offset from offsets import _Tick cdef bint PY2 = str == bytes +cdef enum: + INT32_MIN = -2147483648 ctypedef struct asfreq_info: @@ -67,60 +69,24 @@ ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil cdef extern from *: """ /*** FREQUENCY CONSTANTS ***/ + // See frequencies.pyx for more detailed variants #define FR_ANN 1000 /* Annual */ - #define FR_ANNDEC FR_ANN /* Annual - December year end*/ - #define FR_ANNJAN 1001 /* Annual - January year end*/ - #define FR_ANNFEB 1002 /* Annual - February year end*/ - #define FR_ANNMAR 1003 /* Annual - March year end*/ - #define FR_ANNAPR 1004 /* Annual - April year end*/ - #define FR_ANNMAY 1005 /* Annual - May year end*/ - #define FR_ANNJUN 1006 /* Annual - June year end*/ - #define FR_ANNJUL 1007 /* Annual - July year end*/ - #define FR_ANNAUG 1008 /* Annual - August year end*/ - #define FR_ANNSEP 1009 /* Annual - September year end*/ - #define FR_ANNOCT 1010 /* Annual - October year end*/ - #define FR_ANNNOV 1011 /* Annual - November year end*/ - - /* The standard quarterly frequencies with various fiscal year ends - eg, Q42005 for Q@OCT runs Aug 1, 2005 to Oct 31, 2005 */ #define FR_QTR 2000 /* Quarterly - December year end (default Q) */ - #define FR_QTRDEC FR_QTR /* Quarterly - December year end */ - #define FR_QTRJAN 2001 /* Quarterly - January year end */ - #define FR_QTRFEB 2002 /* Quarterly - February year end */ - #define FR_QTRMAR 2003 /* Quarterly - March year end */ - #define FR_QTRAPR 2004 /* Quarterly - April year end */ - #define FR_QTRMAY 2005 /* Quarterly - May year end */ - #define FR_QTRJUN 2006 /* Quarterly - June year end */ - #define FR_QTRJUL 2007 /* Quarterly - July year end */ - #define FR_QTRAUG 2008 /* Quarterly - August year end */ - #define FR_QTRSEP 2009 /* Quarterly - September year end */ - #define FR_QTROCT 2010 /* Quarterly - October year end */ - #define FR_QTRNOV 2011 /* Quarterly - November year end */ - - #define FR_MTH 3000 /* Monthly */ - - #define FR_WK 4000 /* Weekly */ - #define FR_WKSUN FR_WK /* Weekly - Sunday end of week */ - #define FR_WKMON 4001 /* Weekly - Monday end of week */ - #define FR_WKTUE 4002 /* Weekly - Tuesday end of week */ - #define FR_WKWED 4003 /* Weekly - Wednesday end of week */ - #define FR_WKTHU 4004 /* Weekly - Thursday end of week */ - #define FR_WKFRI 4005 /* Weekly - Friday end of week */ - #define FR_WKSAT 4006 /* Weekly - Saturday end of week */ - - #define FR_BUS 5000 /* Business days */ - #define FR_DAY 6000 /* Daily */ - #define FR_HR 7000 /* Hourly */ - #define FR_MIN 8000 /* Minutely */ - #define FR_SEC 9000 /* Secondly */ - #define FR_MS 10000 /* Millisecondly */ - #define FR_US 11000 /* Microsecondly */ - #define FR_NS 12000 /* Nanosecondly */ - - #define FR_UND -10000 /* Undefined */ - - static int64_t daytime_conversion_factor_matrix[7][7] = { + #define FR_MTH 3000 /* Monthly */ + #define FR_WK 4000 /* Weekly */ + #define FR_BUS 5000 /* Business days */ + #define FR_DAY 6000 /* Daily */ + #define FR_HR 7000 /* Hourly */ + #define FR_MIN 8000 /* Minutely */ + #define FR_SEC 9000 /* Secondly */ + #define FR_MS 10000 /* Millisecondly */ + #define FR_US 11000 /* Microsecondly */ + #define FR_NS 12000 /* Nanosecondly */ + #define FR_UND -10000 /* Undefined */ + + // must use npy typedef b/c int64_t is aliased in cython-generated c + static npy_int64 daytime_conversion_factor_matrix[7][7] = { {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, {0, 0, 1, 60, 60000, 60000000, 60000000000}, @@ -128,26 +94,9 @@ cdef extern from *: {0, 0, 0, 0, 1, 1000, 1000000}, {0, 0, 0, 0, 0, 1, 1000}, {0, 0, 0, 0, 0, 0, 1}}; - - int max_value(int a, int b) { return a > b ? a : b; } - - static int min_value(int a, int b) { return a < b ? a : b; } - - npy_int64 get_daytime_conversion_factor(int from_index, int to_index) { - int row = min_value(from_index, to_index); - int col = max_value(from_index, to_index); - // row or col < 6 means frequency strictly lower than Daily, which - // do not use daytime_conversion_factors - if (row < 6) { - return 0; - } else if (col < 6) { - return 0; - } - return daytime_conversion_factor_matrix[row - 6][col - 6]; - } """ - int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil - int max_value(int left, int right) nogil + int64_t daytime_conversion_factor_matrix[7][7] + # TODO: Can we get these frequencies from frequencies.FreqGroup? int FR_ANN int FR_QTR int FR_MTH @@ -163,6 +112,31 @@ cdef extern from *: int FR_UND +cdef int max_value(int left, int right) nogil: + if left > right: + return left + return right + + +cdef int min_value(int left, int right) nogil: + if left < right: + return left + return right + + +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: + cdef: + int row = min_value(from_index, to_index) + int col = max_value(from_index, to_index) + # row or col < 6 means frequency strictly lower than Daily, which + # do not use daytime_conversion_factors + if row < 6: + return 0 + elif col < 6: + return 0 + return daytime_conversion_factor_matrix[row - 6][col - 6] + + cdef int64_t nofunc(int64_t ordinal, asfreq_info *af_info): return np.iinfo(np.int32).min @@ -1250,7 +1224,7 @@ def period_format(int64_t value, int freq, object fmt=None): elif freq_group == 12000: # NANOSEC fmt = b'%Y-%m-%d %H:%M:%S.%n' else: - raise ValueError('Unknown freq: %d' % freq) + raise ValueError('Unknown freq: {freq}'.format(freq=freq)) return _period_strftime(value, freq, fmt) @@ -1415,7 +1389,7 @@ def get_period_field_arr(int code, int64_t[:] arr, int freq): func = _get_accessor_func(code) if func is NULL: - raise ValueError('Unrecognized period code: %d' % code) + raise ValueError('Unrecognized period code: {code}'.format(code=code)) sz = len(arr) out = np.empty(sz, dtype=np.int64) @@ -1597,7 +1571,8 @@ cdef class _Period(object): if freq.n <= 0: raise ValueError('Frequency must be positive, because it' - ' represents span: {0}'.format(freq.freqstr)) + ' represents span: {freqstr}' + .format(freqstr=freq.freqstr)) return freq @@ -1629,8 +1604,9 @@ cdef class _Period(object): return NotImplemented elif op == Py_NE: return NotImplemented - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) + raise TypeError('Cannot compare type {cls} with type {typ}' + .format(cls=type(self).__name__, + typ=type(other).__name__)) def __hash__(self): return hash((self.ordinal, self.freqstr)) @@ -2428,8 +2404,8 @@ class Period(_Period): freq = cls._maybe_convert_freq(freq) if ordinal is not None and value is not None: - raise ValueError(("Only value or ordinal but not both should be " - "given but not both")) + raise ValueError("Only value or ordinal but not both should be " + "given but not both") elif ordinal is not None: if not util.is_integer_object(ordinal): raise ValueError("Ordinal must be an integer") @@ -2481,7 +2457,8 @@ class Period(_Period): freq = Resolution.get_freq(reso) except KeyError: raise ValueError( - "Invalid frequency or could not infer: %s" % reso) + "Invalid frequency or could not infer: {reso}" + .format(reso=reso)) elif isinstance(value, datetime): dt = value diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 83be739a6ae0a..4e3350395400c 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cython cimport Py_ssize_t diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 659afd152106d..866c9ca9d3ac7 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -50,18 +50,6 @@ int is_leapyear(npy_int64 year) { ((year % 100) != 0 || (year % 400) == 0); } -/* - * Sakamoto's method, from wikipedia - */ -int dayofweek(int y, int m, int d) { - int day; - static const int t[] = {0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4}; - y -= m < 3; - day = (y + y / 4 - y / 100 + y / 400 + t[m - 1] + d) % 7; - // convert to python day - return (day + 6) % 7; -} - /* * Adjusts a datetimestruct based on a minutes offset. Assumes * the current values are valid.g diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 3974d5083f51b..549d38409ca83 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -48,8 +48,6 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta val, NPY_DATETIMEUNIT fr, pandas_timedeltastruct *result); -int dayofweek(int y, int m, int d); - extern const int days_per_month_table[2][12]; // stuff numpy-derived code needs in header diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 8e7c55051a3c0..d472320cfdb12 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -# cython: profile=False """Strptime-related classes and functions. """ import time import locale import calendar import re +from datetime import date as datetime_date # Python 2 vs Python 3 @@ -20,14 +20,14 @@ except: except: from _dummy_thread import allocate_lock as _thread_allocate_lock -import pytz - from cython cimport Py_ssize_t + +import pytz + import numpy as np from numpy cimport int64_t -from datetime import date as datetime_date from np_datetime cimport (check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 2413c281e0a52..ef9fd3207e5f0 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False -from cpython.datetime cimport timedelta - from numpy cimport int64_t # Exposed for tslib, not intended for outside use. diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index eb5c0076a868a..67420fda8aa51 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -17,8 +17,7 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT from util cimport (is_datetime64_object, is_timedelta64_object, - is_integer_object, is_string_object, is_array, - INT64_MAX) + is_integer_object, is_string_object, is_array) cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps @@ -1103,7 +1102,7 @@ class Timestamp(_Timestamp): # Add the min and max fields at the class level -cdef int64_t _NS_UPPER_BOUND = INT64_MAX +cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # the smallest value we could actually represent is # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 0470202ee7d98..0ba61fcc58f46 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -24,10 +24,10 @@ cdef extern from "Python.h": bint PyComplex_Check(object obj) nogil bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil +from numpy cimport int64_t cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type - ctypedef signed long long int64_t int _import_array() except -1 cdef extern from "numpy/ndarrayobject.h": @@ -43,21 +43,6 @@ cdef extern from "numpy/npy_common.h": int64_t NPY_MIN_INT64 -cdef extern from "../src/headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - - cdef inline int64_t get_nat(): return NPY_MIN_INT64 diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 134f34330d8aa..31843a755e7b1 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -14,6 +14,21 @@ cdef extern from "src/numpy_helper.h": const char *get_c_string(object) except NULL +cdef extern from "src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + ctypedef fused numeric: cnp.int8_t cnp.int16_t diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index ccd418a69c827..223298dc42544 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -13,10 +13,9 @@ from pandas.tseries.frequencies import get_offset from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG from pandas.tseries.offsets import FY5253Quarter, FY5253 -from pandas._libs.tslibs.offsets import WeekDay from .common import assert_offset_equal, assert_onOffset -from .test_offsets import Base +from .test_offsets import Base, WeekDay def makeFY5253LastOfMonthQuarter(*args, **kwds): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 57b9a281ac0eb..e95f1ba11ad5c 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -17,7 +17,7 @@ from pandas.core.indexes.datetimes import ( _to_m8, DatetimeIndex, _daterange_cache) import pandas._libs.tslibs.offsets as liboffsets -from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset +from pandas._libs.tslibs.offsets import CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour, WeekOfMonth, CBMonthEnd, CustomBusinessHour, @@ -39,6 +39,18 @@ from .common import assert_offset_equal, assert_onOffset + +class WeekDay(object): + # TODO: Remove: This is not used outside of tests + MON = 0 + TUE = 1 + WED = 2 + THU = 3 + FRI = 4 + SAT = 5 + SUN = 6 + + #### # Misc function tests ####