Merge branch 'master' of https://github.com/pandas-dev/pandas into di…

…v_zero2
pandas-dev · Jan 25, 2018 · 5d7e3ea · 5d7e3ea
2 parents ea75c3c + d3f7d2a
commit 5d7e3ea
Show file tree

Hide file tree

Showing 72 changed files with 1,763 additions and 1,667 deletions.
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -30,6 +30,13 @@ if [ "$LINT" ]; then
     fi
     echo "Linting asv_bench/benchmarks/*.py DONE"
 
+    echo "Linting scripts/*.py"
+    flake8 scripts --filename=*.py
+    if [ $? -ne "0" ]; then
+        RET=1
+    fi
+    echo "Linting scripts/*.py DONE"
+
     echo "Linting *.pyx"
     flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403
     if [ $? -ne "0" ]; then
@@ -89,6 +96,14 @@ if [ "$LINT" ]; then
     if [ $? = "0" ]; then
         RET=1
     fi
+
+    # Check for pytest.warns
+    grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
+
+    if [ $? = "0" ]; then
+        RET=1
+    fi
+
     echo "Check for invalid testing DONE"
 
     # Check for imports from pandas.core.common instead

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -1617,7 +1617,6 @@ IntervalIndex Components
    IntervalIndex.from_arrays
    IntervalIndex.from_tuples
    IntervalIndex.from_breaks
-   IntervalIndex.from_intervals
    IntervalIndex.contains
    IntervalIndex.left
    IntervalIndex.right

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -235,9 +235,8 @@ Other Enhancements
   :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas
   to register custom accessors like ``.cat`` on pandas objects. See
   :ref:`Registering Custom Accessors <developer.register-accessors>` for more (:issue:`14781`).
-
-
 - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
+- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`)
 
 .. _whatsnew_0230.api_breaking:
 
@@ -357,6 +356,7 @@ Deprecations
 - ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`).
 - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`)
 - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`).
+- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
 
 
 .. _whatsnew_0230.prior_deprecations:
@@ -407,6 +407,8 @@ Performance Improvements
 - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
 - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
 - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
+- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
+
 
 .. _whatsnew_0230.docs:
 
@@ -532,6 +534,7 @@ Groupby/Resample/Rolling
 - Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`)
 - Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`)
 - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
+- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
 -
 
 Sparse

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -196,24 +196,6 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil:
     return a[k]
 
 
-cpdef numeric median(numeric[:] arr):
-    """
-    A faster median
-    """
-    cdef Py_ssize_t n = arr.size
-
-    if n == 0:
-        return np.NaN
-
-    arr = arr.copy()
-
-    if n % 2:
-        return kth_smallest(arr, n // 2)
-    else:
-        return (kth_smallest(arr, n // 2) +
-                kth_smallest(arr, n // 2 - 1)) / 2
-
-
 # ----------------------------------------------------------------------
 # Pairwise correlation/covariance
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -118,7 +118,7 @@ def group_last_object(ndarray[object, ndim=2] out,
                 out[i, j] = resx[i, j]
 
 
-cdef inline float64_t _median_linear(float64_t* a, int n) nogil:
+cdef inline float64_t median_linear(float64_t* a, int n) nogil:
     cdef int i, j, na_count = 0
     cdef float64_t result
     cdef float64_t* tmp

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
@@ -740,7 +740,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
             ptr += _counts[0]
             for j in range(ngroups):
                 size = _counts[j + 1]
-                out[j, i] = _median_linear(ptr, size)
+                out[j, i] = median_linear(ptr, size)
                 ptr += size
 
 

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -73,10 +73,6 @@ cpdef object get_value_box(ndarray arr, object loc):
         return util.get_value_1d(arr, i)
 
 
-def set_value_at(ndarray arr, object loc, object val):
-    return util.set_value_at(arr, loc, val)
-
-
 # Don't populate hash tables in monotonic indexes larger than this
 _SIZE_CUTOFF = 1000000
 
@@ -404,18 +400,6 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
     else:
         return mid + 1
 
-_pad_functions = {
-    'object': algos.pad_object,
-    'int64': algos.pad_int64,
-    'float64': algos.pad_float64
-}
-
-_backfill_functions = {
-    'object': algos.backfill_object,
-    'int64': algos.backfill_int64,
-    'float64': algos.backfill_float64
-}
-
 
 cdef class DatetimeEngine(Int64Engine):
 
@@ -566,7 +550,7 @@ cpdef convert_scalar(ndarray arr, object value):
     # we don't turn bools into int/float/complex
 
     if arr.descr.type_num == NPY_DATETIME:
-        if isinstance(value, np.ndarray):
+        if util.is_array(value):
             pass
         elif isinstance(value, (datetime, np.datetime64, date)):
             return Timestamp(value).value
@@ -577,7 +561,7 @@ cpdef convert_scalar(ndarray arr, object value):
         raise ValueError("cannot set a Timestamp with a non-timestamp")
 
     elif arr.descr.type_num == NPY_TIMEDELTA:
-        if isinstance(value, np.ndarray):
+        if util.is_array(value):
             pass
         elif isinstance(value, timedelta):
             return Timedelta(value).value

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -4,6 +4,7 @@ cimport cython
 from cython cimport Py_ssize_t
 
 from cpython cimport PyObject
+from cpython.slice cimport PySlice_Check
 
 cdef extern from "Python.h":
     Py_ssize_t PY_SSIZE_T_MAX
@@ -32,7 +33,7 @@ cdef class BlockPlacement:
         self._has_slice = False
         self._has_array = False
 
-        if isinstance(val, slice):
+        if PySlice_Check(val):
             slc = slice_canonize(val)
 
             if slc.start != slc.stop:
@@ -118,7 +119,7 @@ cdef class BlockPlacement:
         else:
             val = self._as_array[loc]
 
-        if not isinstance(val, slice) and val.ndim == 0:
+        if not PySlice_Check(val) and val.ndim == 0:
             return val
 
         return BlockPlacement(val)
@@ -288,7 +289,7 @@ def slice_getitem(slice slc not None, ind):
 
     s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc)
 
-    if isinstance(ind, slice):
+    if PySlice_Check(ind):
         ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind,
                                                                       s_len)
 

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -109,6 +109,7 @@ cdef class Interval(IntervalMixin):
     cut, qcut : Convert arrays of continuous data into Categoricals/Series of
                 Interval.
     """
+    _typ = "interval"
 
     cdef readonly object left
     """Left bound for the interval"""

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -17,8 +17,6 @@ from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM,
 np.import_array()
 np.import_ufunc()
 
-from libc.stdlib cimport malloc, free
-
 from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
                       PyList_Check, PyFloat_Check,
                       PyString_Check,
@@ -27,8 +25,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
                       PyTuple_New,
                       PyObject_RichCompareBool,
                       PyBytes_GET_SIZE,
-                      PyUnicode_GET_SIZE,
-                      PyObject)
+                      PyUnicode_GET_SIZE)
 
 try:
     from cpython cimport PyString_GET_SIZE
@@ -37,19 +34,13 @@ except ImportError:
 
 cimport cpython
 
-isnan = np.isnan
-cdef double NaN = <double> np.NaN
-cdef double nan = NaN
 
 from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
                                PyTime_Check, PyDelta_Check,
                                PyDateTime_IMPORT)
 PyDateTime_IMPORT
 
-from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value
-
 from tslib import NaT, Timestamp, Timedelta, array_to_datetime
-from interval import Interval
 from missing cimport checknull