From b4e582fb5ea0d4aef1158fdf59963d8d70fad36c Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 06:29:09 +0000 Subject: [PATCH 01/13] add type hints --- pandas/core/dtypes/cast.py | 52 +++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a7379376c2f78..5c33d14aa927d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -3,7 +3,7 @@ """ from datetime import date, datetime, timedelta -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type +from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Set, Tuple, Type, Union import numpy as np @@ -18,7 +18,7 @@ ints_to_pydatetime, ) from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import ArrayLike, Dtype, DtypeObj +from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( @@ -118,7 +118,7 @@ def is_nested_object(obj) -> bool: return False -def maybe_downcast_to_dtype(result, dtype): +def maybe_downcast_to_dtype(result, dtype: Dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 @@ -186,7 +186,7 @@ def maybe_downcast_to_dtype(result, dtype): return result -def maybe_downcast_numeric(result, dtype, do_round: bool = False): +def maybe_downcast_numeric(result, dtype: Dtype, do_round: bool = False): """ Subset of maybe_downcast_to_dtype restricted to numeric dtypes. @@ -329,7 +329,9 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: return dtype -def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None): +def maybe_cast_to_extension_array( + cls: Type["ExtensionArray"], obj, dtype: Dtype = None +): """ Call to `_from_sequence` that returns the object unchanged on Exception. @@ -362,7 +364,9 @@ def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None): return result -def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): +def maybe_upcast_putmask( + result: np.ndarray, mask: np.ndarray, other: Scalar +) -> Tuple[np.ndarray, bool]: """ A safe version of putmask that potentially upcasts the result. @@ -660,7 +664,7 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _ensure_dtype_type(value, dtype): +def _ensure_dtype_type(value, dtype: DtypeObj): """ Ensure that the given value is an instance of the given dtype. @@ -787,7 +791,9 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, # TODO: try to make the Any in the return annotation more specific -def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: +def infer_dtype_from_array( + arr, pandas_dtype: bool = False +) -> Tuple[DtypeObj, AnyArrayLike]: """ Infer the dtype from an array. @@ -875,7 +881,12 @@ def maybe_infer_dtype_type(element): return tipo -def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): +def maybe_upcast( + values: ArrayLike, + fill_value: Scalar = np.nan, + dtype: Dtype = None, + copy: bool = False, +) -> Tuple[ArrayLike, Scalar]: """ Provide explicit type promotion and coercion. @@ -887,6 +898,13 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): dtype : if None, then use the dtype of the values, else coerce to this type copy : bool, default True If True always make a copy even if no upcast is required. + + Returns + ------- + values: ndarray or ExtensionArray + the original array, possibly upcast + fill_value: + the fill value, possibly upcast """ if not is_scalar(fill_value) and not is_object_dtype(values.dtype): # We allow arbitrary fill values for object dtype @@ -907,7 +925,7 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): return values, fill_value -def invalidate_string_dtypes(dtype_set): +def invalidate_string_dtypes(dtype_set: Set): """ Change string like dtypes to object for ``DataFrame.select_dtypes()``. @@ -929,7 +947,7 @@ def coerce_indexer_dtype(indexer, categories): return ensure_int64(indexer) -def coerce_to_dtypes(result, dtypes): +def coerce_to_dtypes(result: Sequence[Scalar], dtypes: Sequence[Dtype]) -> List[Scalar]: """ given a dtypes and a result set, coerce the result elements to the dtypes @@ -959,7 +977,9 @@ def conv(r, dtype): return [conv(r, dtype) for r, dtype in zip(result, dtypes)] -def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): +def astype_nansafe( + arr, dtype: DtypeObj, copy: bool = True, skipna: bool = False +) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. @@ -1063,7 +1083,9 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): return arr.view(dtype) -def maybe_convert_objects(values: np.ndarray, convert_numeric: bool = True): +def maybe_convert_objects( + values: np.ndarray, convert_numeric: bool = True +) -> Union[np.ndarray, ABCDatetimeIndex]: """ If we have an object dtype array, try to coerce dates and/or numbers. @@ -1184,7 +1206,7 @@ def soft_convert_objects( def convert_dtypes( - input_array, + input_array: AnyArrayLike, convert_string: bool = True, convert_integer: bool = True, convert_boolean: bool = True, @@ -1250,7 +1272,7 @@ def convert_dtypes( return inferred_dtype -def maybe_castable(arr) -> bool: +def maybe_castable(arr: np.ndarray) -> bool: # return False to force a non-fastpath # check datetime64[ns]/timedelta64[ns] are valid From 2cc7e9714fb0767a2ac0a5bb35b2d072206d7be8 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 16:17:23 +0000 Subject: [PATCH 02/13] feedback --- pandas/core/dtypes/cast.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5c33d14aa927d..c73235773761a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -80,6 +80,8 @@ notna, ) +from pandas.core.indexes.datetimes import DatetimeIndex + if TYPE_CHECKING: from pandas import Series from pandas.core.arrays import ExtensionArray @@ -186,7 +188,7 @@ def maybe_downcast_to_dtype(result, dtype: Dtype): return result -def maybe_downcast_numeric(result, dtype: Dtype, do_round: bool = False): +def maybe_downcast_numeric(result, dtype: DtypeObj, do_round: bool = False): """ Subset of maybe_downcast_to_dtype restricted to numeric dtypes. @@ -330,8 +332,8 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: def maybe_cast_to_extension_array( - cls: Type["ExtensionArray"], obj, dtype: Dtype = None -): + cls: Type["ExtensionArray"], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None +) -> ArrayLike: """ Call to `_from_sequence` that returns the object unchanged on Exception. @@ -793,7 +795,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, # TODO: try to make the Any in the return annotation more specific def infer_dtype_from_array( arr, pandas_dtype: bool = False -) -> Tuple[DtypeObj, AnyArrayLike]: +) -> Tuple[DtypeObj, ArrayLike]: """ Infer the dtype from an array. @@ -1085,7 +1087,7 @@ def astype_nansafe( def maybe_convert_objects( values: np.ndarray, convert_numeric: bool = True -) -> Union[np.ndarray, ABCDatetimeIndex]: +) -> Union[np.ndarray, DatetimeIndex]: """ If we have an object dtype array, try to coerce dates and/or numbers. From 8da843d1bda5723075b0cd07fb297ce7ace1d4ae Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 16:57:41 +0000 Subject: [PATCH 03/13] more type hints --- pandas/core/dtypes/cast.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c73235773761a..c5b2b0877d47e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -80,11 +80,10 @@ notna, ) -from pandas.core.indexes.datetimes import DatetimeIndex - if TYPE_CHECKING: from pandas import Series from pandas.core.arrays import ExtensionArray + from pandas.core.indexes.datetimes import DatetimeIndex _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max @@ -1087,7 +1086,7 @@ def astype_nansafe( def maybe_convert_objects( values: np.ndarray, convert_numeric: bool = True -) -> Union[np.ndarray, DatetimeIndex]: +) -> Union[np.ndarray, "DatetimeIndex"]: """ If we have an object dtype array, try to coerce dates and/or numbers. From fc757c06bc78ae266fa7adc593c2d7a532e5d26f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 18:01:08 +0000 Subject: [PATCH 04/13] more type hints --- pandas/core/dtypes/cast.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c5b2b0877d47e..d4d5c3c53724a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -3,7 +3,18 @@ """ from datetime import date, datetime, timedelta -from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Set, Tuple, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Sequence, + Set, + Sized, + Tuple, + Type, + Union, +) import numpy as np @@ -1287,7 +1298,9 @@ def maybe_castable(arr: np.ndarray) -> bool: return arr.dtype.name not in POSSIBLY_CAST_DTYPES -def maybe_infer_to_datetimelike(value, convert_dates: bool = False): +def maybe_infer_to_datetimelike( + value: Union[ArrayLike, Scalar], convert_dates: bool = False +): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -1396,7 +1409,7 @@ def try_timedelta(v): return value -def maybe_cast_to_datetime(value, dtype, errors: str = "raise"): +def maybe_cast_to_datetime(value, dtype: DtypeObj, errors: str = "raise"): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT @@ -1589,7 +1602,9 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj: return np.find_common_type(types, []) -def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.ndarray: +def cast_scalar_to_array( + shape: Tuple, value: Scalar, dtype: Optional[DtypeObj] = None +) -> np.ndarray: """ Create np.ndarray of specified shape and dtype, filled with values. @@ -1617,7 +1632,7 @@ def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.n def construct_1d_arraylike_from_scalar( - value, length: int, dtype: DtypeObj + value: Scalar, length: int, dtype: DtypeObj ) -> ArrayLike: """ create a np.ndarray / pandas type of specified shape and dtype @@ -1661,7 +1676,7 @@ def construct_1d_arraylike_from_scalar( return subarr -def construct_1d_object_array_from_listlike(values) -> np.ndarray: +def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: """ Transform any list-like object in a 1-dimensional numpy array of object dtype. @@ -1687,7 +1702,7 @@ def construct_1d_object_array_from_listlike(values) -> np.ndarray: def construct_1d_ndarray_preserving_na( - values, dtype: Optional[DtypeObj] = None, copy: bool = False + values: Sequence, dtype: Optional[DtypeObj] = None, copy: bool = False ) -> np.ndarray: """ Construct a new ndarray, coercing `values` to `dtype`, preserving NA. @@ -1721,7 +1736,7 @@ def construct_1d_ndarray_preserving_na( return subarr -def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): +def maybe_cast_to_integer_array(arr, dtype: Union[str, np.dtype], copy: bool = False): """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes. @@ -1791,7 +1806,7 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): raise ValueError("Trying to coerce float values to integers") -def convert_scalar_for_putitemlike(scalar, dtype: np.dtype): +def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar: """ Convert datetimelike scalar if we are setting into a datetime64 or timedelta64 ndarray. @@ -1822,7 +1837,7 @@ def convert_scalar_for_putitemlike(scalar, dtype: np.dtype): return scalar -def validate_numeric_casting(dtype: np.dtype, value): +def validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None: """ Check that we can losslessly insert the given value into an array with the given dtype. From 8999b35fdd0307ba5f135ba6f69c28e78c3dca89 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 18:04:42 +0000 Subject: [PATCH 05/13] remove addressed TODO --- pandas/core/dtypes/cast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d4d5c3c53724a..b046a1ae412b6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -802,7 +802,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, return dtype, val -# TODO: try to make the Any in the return annotation more specific def infer_dtype_from_array( arr, pandas_dtype: bool = False ) -> Tuple[DtypeObj, ArrayLike]: From d0548688eff2d1e3518ee40f93bfcd76bfcc52e4 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 04:00:24 +0000 Subject: [PATCH 06/13] add type hints to maybe_casted_values --- pandas/core/dtypes/cast.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b046a1ae412b6..49432d1abc632 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -94,6 +94,7 @@ if TYPE_CHECKING: from pandas import Series from pandas.core.arrays import ExtensionArray + from pandas.core.indexes.base import Index from pandas.core.indexes.datetimes import DatetimeIndex _int8_max = np.iinfo(np.int8).max @@ -460,7 +461,9 @@ def changeit(): return result, False -def maybe_casted_values(index, codes=None): +def maybe_casted_values( + index: "Index", codes: Optional[Sequence[int]] = None +) -> ArrayLike: """ Convert an index, given directly or as a pair (level, code), to a 1D array. From 92d94b0f0eaa993abd504f5416ca16a744c10483 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 04:40:08 +0000 Subject: [PATCH 07/13] remove type hint from codes pending followon PR --- pandas/core/dtypes/cast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 49432d1abc632..14c8f84744ca8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -462,7 +462,7 @@ def changeit(): def maybe_casted_values( - index: "Index", codes: Optional[Sequence[int]] = None + index: "Index", codes: Optional[np.ndarray] = None ) -> ArrayLike: """ Convert an index, given directly or as a pair (level, code), to a 1D array. @@ -487,7 +487,7 @@ def maybe_casted_values( # if we have the codes, extract the values with a mask if codes is not None: - mask = codes == -1 + mask: np.ndarray = codes == -1 # we can have situations where the whole mask is -1, # meaning there is nothing found in codes, so make all nan's From a5e9800ad3d547815ae3ef5b554f065509470421 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 17:04:32 +0000 Subject: [PATCH 08/13] feedback: invalidate_string_dtype --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 14c8f84744ca8..e741c4163a34b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -939,7 +939,7 @@ def maybe_upcast( return values, fill_value -def invalidate_string_dtypes(dtype_set: Set): +def invalidate_string_dtypes(dtype_set: Set[DtypeObj]): """ Change string like dtypes to object for ``DataFrame.select_dtypes()``. From afd887b6307ba9026882e5ec737ae71f73cbdfb0 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 17:10:34 +0000 Subject: [PATCH 09/13] feedback: add assert to maybe_castable --- pandas/core/dtypes/cast.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e741c4163a34b..87edeb6e38416 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1289,6 +1289,8 @@ def convert_dtypes( def maybe_castable(arr: np.ndarray) -> bool: # return False to force a non-fastpath + assert isinstance(arr, np.ndarray) # GH 37024 + # check datetime64[ns]/timedelta64[ns] are valid # otherwise try to coerce kind = arr.dtype.kind From 7f4e93eba0c97ee290daec19cbab93261591b4dd Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 19:17:22 +0000 Subject: [PATCH 10/13] feedback: update docstring [convert_dtypes] --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 87edeb6e38416..e35dd427f7319 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1231,7 +1231,7 @@ def convert_dtypes( Parameters ---------- - input_array : ExtensionArray or PandasArray + input_array : ExtensionArray, Index, Series or np.ndarray convert_string : bool, default True Whether object dtypes should be converted to ``StringDtype()``. convert_integer : bool, default True From 3be4aba1f52e251a600728a31ab978c2cc570f00 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 19:18:25 +0000 Subject: [PATCH 11/13] feedback: revert variable type annotation in maybe_casted_values --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e35dd427f7319..f36a583a3c6ac 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -487,7 +487,7 @@ def maybe_casted_values( # if we have the codes, extract the values with a mask if codes is not None: - mask: np.ndarray = codes == -1 + mask = codes == -1 # we can have situations where the whole mask is -1, # meaning there is nothing found in codes, so make all nan's From 25ce7ed84a02d5e1cb6ef42f1bbff08f9fc311dd Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 19:23:20 +0000 Subject: [PATCH 12/13] revert annotations and add TODO for maybe_cast_to_EA --- pandas/core/dtypes/cast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f36a583a3c6ac..e68f2282e1435 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -342,8 +342,10 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: return dtype +# TODO: annotate obj, return type +# both ArrayLike but not necessarily the same => need a Union (xref GH36100) def maybe_cast_to_extension_array( - cls: Type["ExtensionArray"], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None + cls: Type["ExtensionArray"], obj, dtype: Optional[ExtensionDtype] = None ) -> ArrayLike: """ Call to `_from_sequence` that returns the object unchanged on Exception. From 3aa7e4dbb2cabe057c4177fab04df4cddbae9198 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 12 Oct 2020 19:49:39 +0000 Subject: [PATCH 13/13] use Dtype in maybe_cast_to_integer_array --- pandas/core/dtypes/cast.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e68f2282e1435..1dce5c2be809b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -342,10 +342,8 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: return dtype -# TODO: annotate obj, return type -# both ArrayLike but not necessarily the same => need a Union (xref GH36100) def maybe_cast_to_extension_array( - cls: Type["ExtensionArray"], obj, dtype: Optional[ExtensionDtype] = None + cls: Type["ExtensionArray"], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None ) -> ArrayLike: """ Call to `_from_sequence` that returns the object unchanged on Exception. @@ -489,7 +487,7 @@ def maybe_casted_values( # if we have the codes, extract the values with a mask if codes is not None: - mask = codes == -1 + mask: np.ndarray = codes == -1 # we can have situations where the whole mask is -1, # meaning there is nothing found in codes, so make all nan's @@ -1742,7 +1740,7 @@ def construct_1d_ndarray_preserving_na( return subarr -def maybe_cast_to_integer_array(arr, dtype: Union[str, np.dtype], copy: bool = False): +def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False): """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes.