diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d0a932ec378b90..8d4fcf17dbb782 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1357,7 +1357,7 @@ def isocalendar(self) -> DataFrame: 0 2000 1 2001 2 2002 - dtype: int64 + dtype: int32 """, ) month = _field_accessor( @@ -1380,7 +1380,7 @@ def isocalendar(self) -> DataFrame: 0 1 1 2 2 3 - dtype: int64 + dtype: int32 """, ) day = _field_accessor( @@ -1403,7 +1403,7 @@ def isocalendar(self) -> DataFrame: 0 1 1 2 2 3 - dtype: int64 + dtype: int32 """, ) hour = _field_accessor( @@ -1426,7 +1426,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) minute = _field_accessor( @@ -1449,7 +1449,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) second = _field_accessor( @@ -1472,7 +1472,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) microsecond = _field_accessor( @@ -1495,7 +1495,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) nanosecond = _field_accessor( @@ -1518,7 +1518,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) _dayofweek_doc = """ @@ -1553,7 +1553,7 @@ def isocalendar(self) -> DataFrame: 2017-01-06 4 2017-01-07 5 2017-01-08 6 - Freq: D, dtype: int64 + Freq: D, dtype: int32 """ day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc) dayofweek = day_of_week diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index fe7cade1711d07..21c705d967d1b3 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -797,7 +797,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: dtype='timedelta64[ns]', freq=None) >>> idx.total_seconds() - Float64Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], + NumericIndex([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64') """ pps = periods_per_second(self._creso) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2ace3b0fa029c4..0eadbea4783564 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6083,7 +6083,7 @@ def astype( 0 1 1 2 dtype: category - Categories (2, int64): [1, 2] + Categories (2, int32): [1, 2] Convert to ordered categorical type with custom ordering: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index da2a0a2a87137f..9e4680d2205b9a 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -161,7 +161,7 @@ class DatetimeProperties(Properties): 0 0 1 1 2 2 - dtype: int64 + dtype: int32 >>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) >>> hours_series @@ -173,7 +173,7 @@ class DatetimeProperties(Properties): 0 0 1 1 2 2 - dtype: int64 + dtype: int32 >>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q")) >>> quarters_series @@ -185,7 +185,7 @@ class DatetimeProperties(Properties): 0 1 1 2 2 3 - dtype: int64 + dtype: int32 Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. @@ -303,7 +303,7 @@ class TimedeltaProperties(Properties): 0 1 1 2 2 3 - dtype: int64 + dtype: int32 """ def to_pytimedelta(self) -> np.ndarray: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 068ff7a0bf1c90..b06721ccade7af 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -106,6 +106,7 @@ is_scalar, is_signed_integer_dtype, is_string_dtype, + is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, validate_all_hashable, @@ -308,7 +309,7 @@ class Index(IndexOpsMixin, PandasObject): Examples -------- >>> pd.Index([1, 2, 3]) - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> pd.Index(list('abc')) Index(['a', 'b', 'c'], dtype='object') @@ -552,7 +553,7 @@ def _ensure_array(cls, data, dtype, copy: bool): data = data.copy() return data - @final + # @final (temporary off until merge NumericIndex into Index @classmethod def _dtype_to_subclass(cls, dtype: DtypeObj): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 @@ -587,18 +588,14 @@ def _dtype_to_subclass(cls, dtype: DtypeObj): return TimedeltaIndex - elif dtype.kind == "f": - from pandas.core.api import Float64Index - - return Float64Index - elif dtype.kind == "u": - from pandas.core.api import UInt64Index - - return UInt64Index - elif dtype.kind == "i": - from pandas.core.api import Int64Index + elif ( + is_numeric_dtype(dtype) + and not is_bool_dtype(dtype) + and not is_complex_dtype(dtype) + ): + from pandas.core.api import NumericIndex - return Int64Index + return NumericIndex elif dtype.kind == "O": # NB: assuming away MultiIndex @@ -1039,18 +1036,34 @@ def astype(self, dtype, copy: bool = True): new_values = astype_nansafe(values, dtype=dtype, copy=copy) # pass copy=False because any copying will be done in the astype above - if self._is_backward_compat_public_numeric_index: - # this block is needed so e.g. NumericIndex[int8].astype("int32") returns - # NumericIndex[int32] and not Int64Index with dtype int64. + if not self._is_backward_compat_public_numeric_index and not isinstance( + self, ABCRangeIndex + ): + # this block is needed so e.g. Int64Index.astype("int32") returns + # Int64Index and not a NumericIndex with dtype int32. # When Int64Index etc. are removed from the code base, removed this also. if ( isinstance(dtype, np.dtype) and is_numeric_dtype(dtype) and not is_complex_dtype(dtype) ): - return self._constructor( - new_values, name=self.name, dtype=dtype, copy=False + from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, ) + + klass: type[Index] + if is_signed_integer_dtype(dtype): + klass = Int64Index + elif is_unsigned_integer_dtype(dtype): + klass = UInt64Index + elif is_float_dtype(dtype): + klass = Float64Index + else: + klass = Index + return klass(new_values, name=self.name, dtype=dtype, copy=False) + return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) _index_shared_docs[ @@ -1749,9 +1762,9 @@ def set_names( -------- >>> idx = pd.Index([1, 2, 3, 4]) >>> idx - Int64Index([1, 2, 3, 4], dtype='int64') + NumericIndex([1, 2, 3, 4], dtype='int64') >>> idx.set_names('quarter') - Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') + NumericIndex([1, 2, 3, 4], dtype='int64', name='quarter') >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], ... [2018, 2019]]) @@ -2037,7 +2050,7 @@ def droplevel(self, level: IndexLabel = 0): names=['x', 'y']) >>> mi.droplevel(['x', 'y']) - Int64Index([5, 6], dtype='int64', name='z') + NumericIndex([5, 6], dtype='int64', name='z') """ if not isinstance(level, (tuple, list)): level = [level] @@ -2652,7 +2665,7 @@ def isna(self) -> npt.NDArray[np.bool_]: >>> idx = pd.Index([5.2, 6.0, np.NaN]) >>> idx - Float64Index([5.2, 6.0, nan], dtype='float64') + NumericIndex([5.2, 6.0, nan], dtype='float64') >>> idx.isna() array([False, False, True]) @@ -2709,7 +2722,7 @@ def notna(self) -> npt.NDArray[np.bool_]: >>> idx = pd.Index([5.2, 6.0, np.NaN]) >>> idx - Float64Index([5.2, 6.0, nan], dtype='float64') + NumericIndex([5.2, 6.0, nan], dtype='float64') >>> idx.notna() array([ True, True, False]) @@ -3021,7 +3034,7 @@ def union(self, other, sort=None): >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.union(idx2) - Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + NumericIndex([1, 2, 3, 4, 5, 6], dtype='int64') Union mismatched dtypes @@ -3213,7 +3226,7 @@ def intersection(self, other, sort: bool = False): >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.intersection(idx2) - Int64Index([3, 4], dtype='int64') + NumericIndex([3, 4], dtype='int64') """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -3360,9 +3373,9 @@ def difference(self, other, sort=None): >>> idx1 = pd.Index([2, 1, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) - Int64Index([1, 2], dtype='int64') + NumericIndex([1, 2], dtype='int64') >>> idx1.difference(idx2, sort=False) - Int64Index([2, 1], dtype='int64') + NumericIndex([2, 1], dtype='int64') """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -3443,7 +3456,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([2, 3, 4, 5]) >>> idx1.symmetric_difference(idx2) - Int64Index([1, 5], dtype='int64') + NumericIndex([1, 5], dtype='int64') """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -5061,7 +5074,7 @@ def __contains__(self, key: Any) -> bool: -------- >>> idx = pd.Index([1, 2, 3, 4]) >>> idx - Int64Index([1, 2, 3, 4], dtype='int64') + NumericIndex([1, 2, 3, 4], dtype='int64') >>> 2 in idx True @@ -5213,6 +5226,7 @@ def putmask(self, mask, value) -> Index: if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): # e.g. None -> np.nan, see also Block._standardize_fill_value value = self._na_value + try: converted = self._validate_fill_value(value) except (LossySetitemError, ValueError, TypeError) as err: @@ -5259,7 +5273,7 @@ def equals(self, other: Any) -> bool: -------- >>> idx1 = pd.Index([1, 2, 3]) >>> idx1 - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> idx1.equals(pd.Index([1, 2, 3])) True @@ -5276,10 +5290,10 @@ def equals(self, other: Any) -> bool: >>> ascending_idx = pd.Index([1, 2, 3]) >>> ascending_idx - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> descending_idx = pd.Index([3, 2, 1]) >>> descending_idx - Int64Index([3, 2, 1], dtype='int64') + NumericIndex([3, 2, 1], dtype='int64') >>> ascending_idx.equals(descending_idx) False @@ -5287,10 +5301,10 @@ def equals(self, other: Any) -> bool: >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') >>> int64_idx - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') >>> uint64_idx - UInt64Index([1, 2, 3], dtype='uint64') + NumericIndex([1, 2, 3], dtype='uint64') >>> int64_idx.equals(uint64_idx) True """ @@ -5513,18 +5527,18 @@ def sort_values( -------- >>> idx = pd.Index([10, 100, 1, 1000]) >>> idx - Int64Index([10, 100, 1, 1000], dtype='int64') + NumericIndex([10, 100, 1, 1000], dtype='int64') Sort values in ascending order (default behavior). >>> idx.sort_values() - Int64Index([1, 10, 100, 1000], dtype='int64') + NumericIndex([1, 10, 100, 1000], dtype='int64') Sort values in descending order, and also get the indices `idx` was sorted by. >>> idx.sort_values(ascending=False, return_indexer=True) - (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) + (NumericIndex([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) """ idx = ensure_key_mapped(self, key) @@ -6081,13 +6095,6 @@ def map(self, mapper, na_action=None): new_values, self.dtype, same_dtype=same_dtype ) - if self._is_backward_compat_public_numeric_index and is_numeric_dtype( - new_values.dtype - ): - return self._constructor( - new_values, dtype=dtype, copy=False, name=self.name - ) - return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) # TODO: De-duplicate with map, xref GH#32349 @@ -6155,7 +6162,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: -------- >>> idx = pd.Index([1,2,3]) >>> idx - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') Check whether each index value in a list of values. @@ -6564,10 +6571,17 @@ def insert(self, loc: int, item) -> Index: loc = loc if loc >= 0 else loc - 1 new_values[loc] = item - if self._typ == "numericindex": - # Use self._constructor instead of Index to retain NumericIndex GH#43921 - # TODO(2.0) can use Index instead of self._constructor - return self._constructor._with_infer(new_values, name=self.name) + if not self._is_backward_compat_public_numeric_index: + from pandas.core.indexes.numeric import NumericIndex + + if not isinstance(self, ABCRangeIndex) or not isinstance( + self, NumericIndex + ): + return Index._with_infer(new_values, name=self.name) + else: + # Use self._constructor instead of Index to retain old-style num. index + # TODO(2.0) can use Index instead of self._constructor + return self._constructor._with_infer(new_values, name=self.name) else: return Index._with_infer(new_values, name=self.name) @@ -6924,7 +6938,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: Examples -------- >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) - Int64Index([1, 2, 3], dtype='int64', name='name') + NumericIndex([1, 2, 3], dtype='int64', name='name') >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) MultiIndex([('a', 'a'), diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 816a1752c5bf02..72b63215167ad7 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -12,7 +12,9 @@ lib, ) from pandas._typing import ( + ArrayLike, Dtype, + DtypeObj, npt, ) from pandas.util._decorators import ( @@ -98,6 +100,7 @@ class NumericIndex(Index): np.dtype(np.uint16): libindex.UInt16Engine, np.dtype(np.uint32): libindex.UInt32Engine, np.dtype(np.uint64): libindex.UInt64Engine, + np.dtype(np.float16): libindex.Float32Engine, # no float16 engine np.dtype(np.float32): libindex.Float32Engine, np.dtype(np.float64): libindex.Float64Engine, np.dtype(np.complex64): libindex.Complex64Engine, @@ -110,6 +113,13 @@ def _engine_type(self) -> type[libindex.IndexEngine]: # "Dict[dtype[Any], Type[IndexEngine]]"; expected type "dtype[Any]" return self._engine_types[self.dtype] # type: ignore[index] + def _get_engine_target(self) -> ArrayLike: + vals = self._values + # pandas has no Float16Engine, so we use Float32Engine instead + if vals.dtype == "float16": + vals = vals.astype("float32") + return vals + @cache_readonly def inferred_type(self) -> str: return { @@ -347,7 +357,26 @@ def _format_native_types( """ -class IntegerIndex(NumericIndex): +class TempBaseIndex(NumericIndex): + @classmethod + def _dtype_to_subclass(cls, dtype: DtypeObj): + if is_integer_dtype(dtype): + from pandas.core.api import Int64Index + + return Int64Index + elif is_unsigned_integer_dtype(dtype): + from pandas.core.api import UInt64Index + + return UInt64Index + elif is_float_dtype(dtype): + from pandas.core.api import Float64Index + + return Float64Index + else: + return super()._dtype_to_subclass(dtype) + + +class IntegerIndex(TempBaseIndex): """ This is an abstract class for Int64Index, UInt64Index. """ @@ -391,7 +420,7 @@ def _engine_type(self) -> type[libindex.UInt64Engine]: return libindex.UInt64Engine -class Float64Index(NumericIndex): +class Float64Index(TempBaseIndex): _index_descr_args = { "klass": "Float64Index", "dtype": "float64", diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ae88b85aa06e18..60bcd4a2ecd3cf 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -184,9 +184,9 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: # error: Return type "Type[Int64Index]" of "_constructor" incompatible with return # type "Type[RangeIndex]" in supertype "Index" @cache_readonly - def _constructor(self) -> type[Int64Index]: # type: ignore[override] + def _constructor(self) -> type[NumericIndex]: # type: ignore[override] """return the class to use for construction""" - return Int64Index + return NumericIndex # error: Signature of "_data" incompatible with supertype "Index" @cache_readonly diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3d777f5579fff4..a9dad23fda5b21 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -109,10 +109,7 @@ from pandas.core.indexers import check_setitem_lengths if TYPE_CHECKING: - from pandas.core.api import ( - Float64Index, - Index, - ) + from pandas.core.api import Index from pandas.core.arrays._mixins import NDArrayBackedExtensionArray # comparison is faster than is_object_dtype @@ -1287,7 +1284,7 @@ def shift( @final def quantile( self, - qs: Float64Index, + qs: Index, # with dtype float64 interpolation: QuantileInterpolation = "linear", axis: AxisInt = 0, ) -> Block: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3eca3756e16780..57f5eae6d706b1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -64,7 +64,6 @@ ) from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import ( - Float64Index, Index, ensure_index, ) @@ -1557,7 +1556,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool: def quantile( self: T, *, - qs: Float64Index, + qs: Index, # with dtype float 64 axis: AxisInt = 0, interpolation: QuantileInterpolation = "linear", ) -> T: @@ -1585,7 +1584,7 @@ def quantile( assert axis == 1 # only ever called this way new_axes = list(self.axes) - new_axes[1] = Float64Index(qs) + new_axes[1] = Index(qs, dtype=np.float64) blocks = [ blk.quantile(axis=axis, qs=qs, interpolation=interpolation) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 71a50c69bfee16..45f3d14438e491 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2248,7 +2248,7 @@ def count(self, pat, flags: int = 0): This is also available on Index >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') - Int64Index([0, 0, 2, 1], dtype='int64') + NumericIndex([0, 0, 2, 1], dtype='int64') """ result = self._data.array._str_count(pat, flags) return self._wrap_result(result, returns_string=False) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 4ecd5b7604088f..e781da74e97aa6 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -17,7 +17,7 @@ from pandas.core.api import ( DataFrame, - Int64Index, + NumericIndex, RangeIndex, ) from pandas.core.shared_docs import _shared_docs @@ -62,7 +62,7 @@ def to_feather( # validate that we have only a default index # raise on anything else as we don't serialize the index - if not isinstance(df.index, (Int64Index, RangeIndex)): + if not (isinstance(df.index, NumericIndex) and df.index.dtype == "int64"): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 5e87db93cf56cc..27b61d502e9de4 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -165,7 +165,7 @@ >>> s = pd.Series(text_values, index=int_values) >>> s.info() - Int64Index: 5 entries, 1 to 5 + NumericIndex: 5 entries, 1 to 5 Series name: None Non-Null Count Dtype -------------- ----- @@ -177,7 +177,7 @@ >>> s.info(verbose=False) - Int64Index: 5 entries, 1 to 5 + NumericIndex: 5 entries, 1 to 5 dtypes: object(1) memory usage: 80.0+ bytes diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 5986f1f6cf51d4..e0145cf93a9070 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -186,7 +186,7 @@ def test_apply_datetimetz(): # change dtype # GH 14506 : Returned dtype changed from int32 to int64 result = s.apply(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32) tm.assert_series_equal(result, exp) # not vectorized @@ -766,7 +766,7 @@ def test_map_datetimetz(): # change dtype # GH 14506 : Returned dtype changed from int32 to int64 result = s.map(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32) tm.assert_series_equal(result, exp) # not vectorized diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index b44af07cee01da..e8b43ea98c032b 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -111,7 +111,7 @@ def test_categorical_repr(self): assert repr(c) == exp - c = Categorical(np.arange(20)) + c = Categorical(np.arange(20, dtype=np.int64)) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" @@ -138,7 +138,7 @@ def test_categorical_repr_ordered(self): assert repr(c) == exp - c = Categorical(np.arange(20), ordered=True) + c = Categorical(np.arange(20, dtype=np.int64), ordered=True) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" @@ -380,7 +380,7 @@ def test_categorical_index_repr(self): exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa:E501 assert repr(idx) == exp - i = CategoricalIndex(Categorical(np.arange(10))) + i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64))) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')""" # noqa:E501 assert repr(i) == exp @@ -389,7 +389,7 @@ def test_categorical_index_repr_ordered(self): exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa:E501 assert repr(i) == exp - i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) + i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64), ordered=True)) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')""" # noqa:E501 assert repr(i) == exp diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 2a6bea32553429..487b628e80d148 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -167,7 +167,7 @@ def test_repr(): expected = ( "\n" "[(0, 1], (1, 2]]\n" - "Length: 2, dtype: interval[int64, right]" + f"Length: 2, dtype: interval[{np.intp.__name__}, right]" ) assert result == expected @@ -287,7 +287,7 @@ def test_arrow_array(): with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): pa.array(intervals, type="float64") - with pytest.raises(TypeError, match="different 'subtype'"): + with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 36af5d32ae4616..df5dd8b8b182c5 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -41,7 +41,12 @@ def test_from_coo(self): sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int") result = pd.Series.sparse.from_coo(sp_array) - index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + index = pd.MultiIndex.from_arrays( + [ + np.array([0, 0, 1, 3], dtype=np.int32), + np.array([0, 2, 1, 3], dtype=np.int32), + ], + ) expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") tm.assert_series_equal(result, expected) @@ -212,7 +217,15 @@ def test_series_from_coo(self, dtype, dense_index): A = scipy.sparse.eye(3, format="coo", dtype=dtype) result = pd.Series.sparse.from_coo(A, dense_index=dense_index) - index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + index_dtype = np.int64 if dense_index else np.int32 + index = pd.MultiIndex.from_tuples( + [ + np.array([0, 0], dtype=index_dtype), + np.array([1, 1], dtype=index_dtype), + np.array([2, 2], dtype=index_dtype), + ], + ) expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) if dense_index: expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index acba1bd5573510..ee1ccb8caee917 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -443,7 +443,8 @@ def DecimalArray__my_sum(self): result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum()) tm.assert_series_equal(result, expected, check_names=False) s = pd.Series(DecimalArray(data)) - result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) + grouper = np.array([0, 0, 0, 1, 1], dtype=np.int64) + result = s.groupby(grouper).agg(lambda x: x.values.my_sum()) tm.assert_series_equal(result, expected, check_names=False) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 6cba95e42463d4..c04213c215f0d9 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -85,7 +85,7 @@ def test_constructor_list_of_series(self): expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected.reindex(result.index)) - result2 = DataFrame(data, index=np.arange(6)) + result2 = DataFrame(data, index=np.arange(6, dtype=np.int64)) tm.assert_frame_equal(result, result2) result = DataFrame([Series(dtype=object)]) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4e4d0590830dec..a3adbe9e1e0bc9 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -736,7 +736,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): # positional slicing only via iloc! msg = ( - "cannot do positional indexing on Float64Index with " + "cannot do positional indexing on NumericIndex with " r"these indexers \[1.0\] of type float" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 501822f856a635..133760f7346f33 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -647,7 +647,8 @@ def test_df_where_change_dtype(self): @pytest.mark.parametrize("kwargs", [{}, {"other": None}]) def test_df_where_with_category(self, kwargs): # GH#16979 - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + data = np.arange(2 * 3, dtype=np.int64).reshape(2, 3) + df = DataFrame(data, columns=list("ABC")) mask = np.array([[True, False, False], [False, False, True]]) # change type to category diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 8e5f11840fbe55..f3b5a51bac2120 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -159,7 +159,7 @@ def test_set_index_cast(self): df = DataFrame( {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012] ) - df2 = df.set_index(df.index.astype(np.int32)) + df2 = df.set_index(df.index.astype(np.int64)) tm.assert_frame_equal(df, df2) # A has duplicate values, C does not diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 7487b2c70a2640..88d9899613b9e1 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -62,8 +62,8 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame): # corner case dm = DataFrame( { - "s1": Series(range(3), index=np.arange(3)), - "s2": Series(range(2), index=np.arange(2)), + "s1": Series(range(3), index=np.arange(3, dtype=np.int64)), + "s2": Series(range(2), index=np.arange(2, dtype=np.int64)), } ) dm.to_csv(path) @@ -486,7 +486,7 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): frame = float_frame old_index = frame.index - arrays = np.arange(len(old_index) * 2).reshape(2, -1) + arrays = np.arange(len(old_index) * 2, dtype=np.int64).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index e420274e7fd82c..16072ba6f5c06a 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -584,7 +584,7 @@ def test_mode_sortwarning(self): def test_mode_empty_df(self): df = DataFrame([], columns=["a", "b"]) result = df.mode() - expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=int)) + expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=np.int64)) tm.assert_frame_equal(result, expected) def test_operators_timedelta64(self): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 96be7a0cb785c3..37ec1376e7173f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -93,7 +93,9 @@ def test_groupby_nonobject_dtype(mframe, df_mixed_floats): result = grouped.sum() expected = mframe.groupby(key.astype("O")).sum() - tm.assert_frame_equal(result, expected) + assert result.index.dtype == np.int8 + assert expected.index.dtype == np.int64 + tm.assert_frame_equal(result, expected, check_index_type=False) # GH 3911, mixed frame non-conversion df = df_mixed_floats.copy() @@ -228,6 +230,7 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): 2: tsframe[tsframe.index.month == 2].quantile(0.8), } expected = DataFrame(ex_data).T + expected.index = expected.index.astype(np.int32) if not as_index: # TODO: try to get this more consistent? expected.index = Index(range(2)) @@ -2814,7 +2817,7 @@ def test_groupby_overflow(val, dtype): result = df.groupby("a").sum() expected = DataFrame( {"b": [val * 2]}, - index=Index([1], name="a", dtype=f"{dtype}64"), + index=Index([1], name="a", dtype=f"{dtype}8"), dtype=f"{dtype}64", ) tm.assert_frame_equal(result, expected) @@ -2826,7 +2829,7 @@ def test_groupby_overflow(val, dtype): result = df.groupby("a").prod() expected = DataFrame( {"b": [val * val]}, - index=Index([1], name="a", dtype=f"{dtype}64"), + index=Index([1], name="a", dtype=f"{dtype}8"), dtype=f"{dtype}64", ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index ad8051792266ea..f7938779c05272 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -311,7 +311,7 @@ def test_transform_datetime_to_numeric(): lambda x: x.dt.dayofweek - x.dt.dayofweek.min() ) - expected = Series([0, 1], name="b") + expected = Series([0, 1], dtype=np.int32, name="b") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2fb95942b08db4..fb9e5dae19f7d7 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -848,11 +848,7 @@ def test_insert_non_na(self, simple_index): result = index.insert(0, index[0]) - cls = type(index) - if cls is RangeIndex: - cls = Int64Index - - expected = cls([index[0]] + list(index), dtype=index.dtype) + expected = Index([index[0]] + list(index), dtype=index.dtype) tm.assert_index_equal(result, expected, exact=True) def test_insert_na(self, nulls_fixture, simple_index): @@ -863,7 +859,7 @@ def test_insert_na(self, nulls_fixture, simple_index): if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) else: - expected = Float64Index([index[0], np.nan] + list(index[1:])) + expected = Index([index[0], np.nan] + list(index[1:])) if index._is_backward_compat_public_numeric_index: # GH#43921 we preserve NumericIndex diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index adbf6c715fef65..abff5b16940253 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -812,7 +812,7 @@ def test_date_range_span_dst_transition(self, tzstr): dr = date_range("2012-11-02", periods=10, tz=tzstr) result = dr.hour - expected = pd.Index([0] * 10) + expected = pd.Index([0] * 10, dtype="int32") tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 1dc01a3d7f937f..a41645f46314ae 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -281,8 +281,9 @@ def test_datetime_name_accessors(self, time_locale): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) + expected = Index(np.arange(10, dtype=np.int32)) - tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64))) + tm.assert_index_equal(dti.nanosecond, expected) def test_iter_readonly(): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 0bc2862e550218..81128efff6c15e 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -113,28 +113,28 @@ def test_dti_tz_convert_hour_overflow_dst(self): ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) @@ -149,7 +149,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern @@ -160,7 +160,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC @@ -171,7 +171,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern @@ -182,7 +182,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) @pytest.mark.parametrize("freq, n", [("H", 1), ("T", 60), ("S", 3600)]) @@ -194,7 +194,7 @@ def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): idx = idx.tz_convert("Europe/Moscow") expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) def test_dti_tz_convert_dst(self): for freq, n in [("H", 1), ("T", 60), ("S", 3600)]: @@ -207,7 +207,7 @@ def test_dti_tz_convert_dst(self): np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) idx = date_range( "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" @@ -217,7 +217,7 @@ def test_dti_tz_convert_dst(self): np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) # End DST idx = date_range( @@ -228,7 +228,7 @@ def test_dti_tz_convert_dst(self): np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) idx = date_range( "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" @@ -238,30 +238,30 @@ def test_dti_tz_convert_dst(self): np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) # daily # Start DST idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") idx = idx.tz_convert("US/Eastern") - tm.assert_index_equal(idx.hour, Index([19, 19])) + tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32)) idx = date_range( "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" ) idx = idx.tz_convert("UTC") - tm.assert_index_equal(idx.hour, Index([5, 5])) + tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32)) # End DST idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") idx = idx.tz_convert("US/Eastern") - tm.assert_index_equal(idx.hour, Index([20, 20])) + tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32)) idx = date_range( "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" ) idx = idx.tz_convert("UTC") - tm.assert_index_equal(idx.hour, Index([4, 4])) + tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32)) def test_tz_convert_roundtrip(self, tz_aware_fixture): tz = tz_aware_fixture @@ -1134,7 +1134,7 @@ def test_field_access_localize(self, prefix): "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" ) - expected = Index(np.arange(10, dtype=np.int64)) + expected = Index(np.arange(10, dtype=np.int32)) tm.assert_index_equal(dr.hour, expected) @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index d0345861d6778f..6233e2a2935baa 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -11,6 +11,10 @@ Series, ) import pandas._testing as tm +from pandas.api.types import ( + is_float_dtype, + is_unsigned_integer_dtype, +) @pytest.mark.parametrize("case", [0.5, "xxx"]) @@ -632,7 +636,10 @@ def test_union_duplicates(index, request): expected = mi2.sort_values() tm.assert_index_equal(result, expected) - if mi2.levels[0].dtype == np.uint64 and (mi2.get_level_values(0) < 2**63).all(): + if ( + is_unsigned_integer_dtype(mi2.levels[0]) + and (mi2.get_level_values(0) < 2**63).all() + ): # GH#47294 - union uses lib.fast_zip, converting data to Python integers # and loses type information. Result is then unsigned only when values are # sufficiently large to require unsigned dtype. This happens only if other @@ -640,6 +647,13 @@ def test_union_duplicates(index, request): expected = expected.set_levels( [expected.levels[0].astype(int), expected.levels[1]] ) + elif is_float_dtype(mi2.levels[0]): + # mi2 has duplicates witch is a different path than above, Fix that path + # to use correct float dtype? + expected = expected.set_levels( + [expected.levels[0].astype(float), expected.levels[1]] + ) + result = mi1.union(mi2) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index 3c4486daa60dfa..634c6cbaa329f9 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -43,7 +43,7 @@ def test_astype_float64_to_int_dtype(self, dtype): # a float astype int idx = Index([0, 1, 2], dtype=np.float64) result = idx.astype(dtype) - expected = Index([0, 1, 2], dtype=np.int64) + expected = Index([0, 1, 2], dtype=dtype) tm.assert_index_equal(result, expected, exact=True) idx = Index([0, 1.1, 2], dtype=np.float64) @@ -57,13 +57,7 @@ def test_astype_float64_to_float_dtype(self, dtype): # a float astype int idx = Index([0, 1, 2], dtype=np.float64) result = idx.astype(dtype) - expected = idx - tm.assert_index_equal(result, expected, exact=True) - - idx = Index([0, 1.1, 2], dtype=np.float64) - result = idx.astype(dtype) - expected = Index(idx.values.astype(dtype)) - tm.assert_index_equal(result, expected, exact=True) + assert isinstance(result, Index) and result.dtype == dtype @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) def test_cannot_cast_to_datetimelike(self, dtype): diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 8901ef7cb3e33d..e2b86472375fef 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -154,12 +154,6 @@ def test_type_coercion_fail(self, any_int_numpy_dtype): with pytest.raises(ValueError, match=msg): Index([1, 2, 3.5], dtype=any_int_numpy_dtype) - def test_type_coercion_valid(self, float_numpy_dtype): - # There is no Float32Index, so we always - # generate Float64Index. - idx = Index([1, 2, 3.5], dtype=float_numpy_dtype) - tm.assert_index_equal(idx, Index([1, 2, 3.5]), exact=True) - def test_equals_numeric(self): index_cls = self._index_cls @@ -452,14 +446,14 @@ def test_constructor_np_signed(self, any_signed_int_numpy_dtype): # GH#47475 scalar = np.dtype(any_signed_int_numpy_dtype).type(1) result = Index([scalar]) - expected = Index([1], dtype=np.int64) + expected = Index([1], dtype=any_signed_int_numpy_dtype) tm.assert_index_equal(result, expected, exact=True) def test_constructor_np_unsigned(self, any_unsigned_int_numpy_dtype): # GH#47475 scalar = np.dtype(any_unsigned_int_numpy_dtype).type(1) result = Index([scalar]) - expected = Index([1], dtype=np.uint64) + expected = Index([1], dtype=any_unsigned_int_numpy_dtype) tm.assert_index_equal(result, expected, exact=True) def test_coerce_list(self): diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py index ed21996de891b9..c3c2560693d3d5 100644 --- a/pandas/tests/indexes/ranges/test_join.py +++ b/pandas/tests/indexes/ranges/test_join.py @@ -66,7 +66,7 @@ def test_join_inner(self): elidx = np.array([8, 9], dtype=np.intp) eridx = np.array([9, 7], dtype=np.intp) - assert isinstance(res, Int64Index) + assert isinstance(res, Index) and res.dtype == np.int64 tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 003c69a6a11a67..b77289d5aadcd8 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -218,7 +218,7 @@ def test_delete_not_preserving_rangeindex(self): loc = [0, 3, 5] result = idx.delete(loc) - expected = Int64Index([1, 2, 4]) + expected = Index([1, 2, 4]) tm.assert_index_equal(result, expected, exact=True) result = idx.delete(loc[::-1]) @@ -536,8 +536,8 @@ def test_len_specialised(self, step): ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), ([RI(2), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), - ([RI(3), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), - ([RI(3), F64([-1, 3.1, 15.0])], F64([0, 1, 2, -1, 3.1, 15.0])), + ([RI(3), OI([-1, 3, 15])], OI([0, 1, 2, -1, 3, 15])), + ([RI(3), OI([-1, 3.1, 15.0])], OI([0, 1, 2, -1, 3.1, 15.0])), ([RI(3), OI(["a", None, 14])], OI([0, 1, 2, "a", None, 14])), ([RI(3, 1), OI(["a", None, 14])], OI(["a", None, 14])), ] diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 71bd2f5590b8f5..29ba5a91498db1 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -16,16 +16,15 @@ Index, Int64Index, RangeIndex, - UInt64Index, ) class TestRangeIndexSetOps: - @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index]) - def test_intersection_mismatched_dtype(self, klass): + @pytest.mark.parametrize("dtype", [None, "int64", "uint64"]) + def test_intersection_mismatched_dtype(self, dtype): # check that we cast to float, not object index = RangeIndex(start=0, stop=20, step=2, name="foo") - index = klass(index) + index = Index(index, dtype=dtype) flt = index.astype(np.float64) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ea0504c6ad4003..b0b3a6132cb098 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -561,7 +561,7 @@ def test_map_tseries_indices_return_index(self, attr): def test_map_tseries_indices_accsr_return_index(self): date_index = tm.makeDateIndex(24, freq="h", name="hourly") - expected = Int64Index(range(24), name="hourly") + expected = Index(range(24), dtype="int32", name="hourly") tm.assert_index_equal(expected, date_index.map(lambda x: x.hour), exact=True) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 4a1333e2b18b42..d6214a79adc93f 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -10,8 +10,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_unsigned_integer_dtype - from pandas import ( NA, Categorical, @@ -31,11 +29,7 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, - UInt64Index, -) +from pandas.core.api import NumericIndex class TestIndexConstructorInference: @@ -90,12 +84,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype): ) def test_constructor_int_dtype_float(self, dtype): # GH#18400 - if is_unsigned_integer_dtype(dtype): - index_type = UInt64Index - else: - index_type = Int64Index - - expected = index_type([0, 1, 2, 3]) + expected = NumericIndex([0, 1, 2, 3], dtype=dtype) result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) tm.assert_index_equal(result, expected) @@ -294,7 +283,7 @@ def test_constructor_int_dtype_nan_raises(self, dtype): ) def test_constructor_dtypes_to_int64(self, vals): index = Index(vals, dtype=int) - assert isinstance(index, Int64Index) + assert isinstance(index, NumericIndex) and index.dtype == "int64" @pytest.mark.parametrize( "vals", @@ -308,7 +297,7 @@ def test_constructor_dtypes_to_int64(self, vals): ) def test_constructor_dtypes_to_float64(self, vals): index = Index(vals, dtype=float) - assert isinstance(index, Float64Index) + assert isinstance(index, NumericIndex) and index.dtype == "float64" @pytest.mark.parametrize( "vals", diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 453ece35a68e7f..3c76387d6cd77b 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -10,10 +10,8 @@ isna, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - NumericIndex, -) +from pandas.api.types import is_complex_dtype +from pandas.core.api import NumericIndex from pandas.core.arrays import BooleanArray from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -81,8 +79,15 @@ def test_numpy_ufuncs_basic(index, func): tm.assert_index_equal(result, exp) if type(index) is not Index or index.dtype == bool: - # i.e NumericIndex - assert isinstance(result, Float64Index) + assert type(result) is NumericIndex + if is_complex_dtype(index): + assert result.dtype == "complex64" + elif index.dtype in ["bool", "int8", "uint8", "float16"]: + assert result.dtype == "float16" + elif index.dtype in ["int16", "uint16", "float32"]: + assert result.dtype == "float32" + else: + assert result.dtype == "float64" else: # e.g. np.exp with Int64 -> Float64 assert type(result) is Index diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 01efbfb9ae0c09..7d1f6aa2df11df 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -14,7 +14,6 @@ ) import pandas._testing as tm from pandas.core.arrays import TimedeltaArray -from pandas.core.indexes.api import Int64Index from pandas.tests.indexes.datetimelike import DatetimeLike randn = np.random.randn @@ -56,7 +55,7 @@ def test_map(self): f = lambda x: x.days result = rng.map(f) - exp = Int64Index([f(x) for x in rng]) + exp = Index([f(x) for x in rng], dtype=np.int32) tm.assert_index_equal(result, exp) def test_pass_TimedeltaIndex_to_index(self): @@ -70,15 +69,16 @@ def test_pass_TimedeltaIndex_to_index(self): def test_fields(self): rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") - tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64")) + tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int32)) tm.assert_index_equal( rng.seconds, - Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"), + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype=np.int32), ) tm.assert_index_equal( - rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64") + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype=np.int32), ) - tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64")) + tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype=np.int32)) msg = "'TimedeltaIndex' object has no attribute '{}'" with pytest.raises(AttributeError, match=msg.format("hours")): diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index c81473cb945bcb..f579f280872a38 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -10,10 +10,6 @@ to_datetime, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, -) class TestMultiIndexPartial: @@ -160,9 +156,9 @@ def test_getitem_intkey_leading_level( mi = ser.index assert isinstance(mi, MultiIndex) if dtype is int: - assert isinstance(mi.levels[0], Int64Index) + assert mi.levels[0].dtype == np.int64 else: - assert isinstance(mi.levels[0], Float64Index) + assert mi.levels[0].dtype == np.float64 assert 14 not in mi.levels[0] assert not mi.levels[0]._should_fallback_to_positional diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 26eb7532adfa4e..b45bac70ba9339 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -437,7 +437,7 @@ def test_loc_to_fail(self): # raise a KeyError? msg = ( - r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " + r"\"None of \[NumericIndex\(\[1, 2\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -455,7 +455,7 @@ def test_loc_to_fail2(self): s.loc[-1] msg = ( - r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " + r"\"None of \[NumericIndex\(\[-1, -2\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -471,7 +471,7 @@ def test_loc_to_fail2(self): s["a"] = 2 msg = ( - r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " + r"\"None of \[NumericIndex\(\[-2\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -488,7 +488,7 @@ def test_loc_to_fail3(self): df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) msg = ( - r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " + r"\"None of \[NumericIndex\(\[3\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -508,7 +508,7 @@ def test_loc_getitem_list_with_fail(self): with pytest.raises( KeyError, match=re.escape( - "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" + "\"None of [NumericIndex([3], dtype='int64')] are in the [index]\"" ), ): s.loc[[3]] @@ -1214,7 +1214,7 @@ def test_loc_setitem_empty_append_raises(self): df = DataFrame(columns=["x", "y"]) df.index = df.index.astype(np.int64) msg = ( - r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " + r"None of \[NumericIndex\(\[0, 1\], dtype='int64'\)\] " r"are in the \[index\]" ) with pytest.raises(KeyError, match=msg): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 938056902e7454..0f926ac1d368b9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -403,7 +403,7 @@ def test_series_partial_set(self): # raises as nothing is in the index msg = ( - r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " + r"\"None of \[NumericIndex\(\[3, 3, 3\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -484,7 +484,7 @@ def test_series_partial_set_with_name(self): # raises as nothing is in the index msg = ( - r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " + r"\"None of \[NumericIndex\(\[3, 3, 3\], dtype='int64', " r"name='idx'\)\] are in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 2699d339504129..d1d002f361072f 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -21,7 +21,6 @@ _testing as tm, concat, ) -from pandas.core.api import Int64Index from pandas.tests.io.pytables.common import ( _maybe_remove, ensure_clean_store, @@ -250,7 +249,7 @@ def test_column_multiindex(setup_path): df = DataFrame(np.arange(12).reshape(3, 4), columns=index) expected = df.copy() if isinstance(expected.index, RangeIndex): - expected.index = Int64Index(expected.index) + expected.index = Index(expected.index.to_numpy()) with ensure_clean_store(setup_path) as store: @@ -282,7 +281,7 @@ def test_column_multiindex(setup_path): df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")) expected = df.copy() if isinstance(expected.index, RangeIndex): - expected.index = Int64Index(expected.index) + expected.index = Index(expected.index.to_numpy()) with ensure_clean_store(setup_path) as store: diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 75683a1d96bfb0..922ce663ab4936 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -829,6 +829,7 @@ def test_s3_roundtrip_for_dir( # GH #35791 if partition_col: + expected_df = expected_df.astype(dict.fromkeys(partition_col, np.int32)) partition_col_type = "category" expected_df[partition_col] = expected_df[partition_col].astype( diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 368e9d5f6e6a10..08d9b518945edc 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -287,7 +287,10 @@ def test_read_write_dta5(self): with tm.ensure_clean() as path: original.to_stata(path, convert_dates=None) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_write_dta6(self, datapath): original = self.read_csv(datapath("io", "data", "stata", "stata3.csv")) @@ -380,7 +383,10 @@ def test_read_write_dta11(self): original.to_stata(path, convert_dates=None) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + expected = formatted.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_read_write_dta12(self, version): @@ -417,7 +423,10 @@ def test_read_write_dta12(self, version): assert len(w) == 1 written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + expected = formatted.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_read_write_dta13(self): s1 = Series(2**9, dtype=np.int16) @@ -432,7 +441,10 @@ def test_read_write_dta13(self): with tm.ensure_clean() as path: original.to_stata(path) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + expected = formatted.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @pytest.mark.parametrize( @@ -455,7 +467,10 @@ def test_read_write_reread_dta14(self, file, parsed_114, version, datapath): with tm.ensure_clean() as path: parsed_114.to_stata(path, convert_dates={"date_td": "td"}, version=version) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114) + + expected = parsed_114.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize( "file", ["stata6_113", "stata6_114", "stata6_115", "stata6_117"] @@ -510,11 +525,15 @@ def test_numeric_column_names(self): original.to_stata(path) written_and_read_again = self.read_dta(path) - written_and_read_again = written_and_read_again.set_index("index") - columns = list(written_and_read_again.columns) - convert_col_name = lambda x: int(x[1]) - written_and_read_again.columns = map(convert_col_name, columns) - tm.assert_frame_equal(original, written_and_read_again) + + written_and_read_again = written_and_read_again.set_index("index") + columns = list(written_and_read_again.columns) + convert_col_name = lambda x: int(x[1]) + written_and_read_again.columns = map(convert_col_name, columns) + + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(expected, written_and_read_again) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_nan_to_missing_value(self, version): @@ -524,11 +543,15 @@ def test_nan_to_missing_value(self, version): s2[1::2] = np.nan original = DataFrame({"s1": s1, "s2": s2}) original.index.name = "index" + with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) - written_and_read_again = written_and_read_again.set_index("index") - tm.assert_frame_equal(written_and_read_again, original) + + written_and_read_again = written_and_read_again.set_index("index") + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again, expected) def test_no_index(self): columns = ["x", "y"] @@ -548,7 +571,10 @@ def test_string_no_dates(self): with tm.ensure_clean() as path: original.to_stata(path) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_large_value_conversion(self): s0 = Series([1, 99], dtype=np.int8) @@ -562,11 +588,13 @@ def test_large_value_conversion(self): original.to_stata(path) written_and_read_again = self.read_dta(path) - modified = original.copy() - modified["s1"] = Series(modified["s1"], dtype=np.int16) - modified["s2"] = Series(modified["s2"], dtype=np.int32) - modified["s3"] = Series(modified["s3"], dtype=np.float64) - tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + modified = original.copy() + modified["s1"] = Series(modified["s1"], dtype=np.int16) + modified["s2"] = Series(modified["s2"], dtype=np.int32) + modified["s3"] = Series(modified["s3"], dtype=np.float64) + modified.index = original.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) def test_dates_invalid_column(self): original = DataFrame([datetime(2006, 11, 19, 23, 13, 20)]) @@ -576,9 +604,11 @@ def test_dates_invalid_column(self): original.to_stata(path, convert_dates={0: "tc"}) written_and_read_again = self.read_dta(path) - modified = original.copy() - modified.columns = ["_0"] - tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + modified = original.copy() + modified.columns = ["_0"] + modified.index = original.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) def test_105(self, datapath): # Data obtained from: @@ -619,21 +649,32 @@ def test_date_export_formats(self): datetime(2006, 1, 1), ] # Year - expected = DataFrame([expected_values], columns=columns) - expected.index.name = "index" + expected = DataFrame( + [expected_values], + index=pd.Index([0], dtype=np.int32, name="index"), + columns=columns, + ) + with tm.ensure_clean() as path: original.to_stata(path, convert_dates=conversions) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_write_missing_strings(self): original = DataFrame([["1"], [None]], columns=["foo"]) - expected = DataFrame([["1"], [""]], columns=["foo"]) - expected.index.name = "index" + + expected = DataFrame( + [["1"], [""]], + index=pd.Index([0, 1], dtype=np.int32, name="index"), + columns=["foo"], + ) + with tm.ensure_clean() as path: original.to_stata(path) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @pytest.mark.parametrize("byteorder", [">", "<"]) @@ -651,6 +692,7 @@ def test_bool_uint(self, byteorder, version): ) original.index.name = "index" expected = original.copy() + expected.index = original.index.astype(np.int32) expected_types = ( np.int8, np.int8, @@ -666,8 +708,9 @@ def test_bool_uint(self, byteorder, version): with tm.ensure_clean() as path: original.to_stata(path, byteorder=byteorder, version=version) written_and_read_again = self.read_dta(path) - written_and_read_again = written_and_read_again.set_index("index") - tm.assert_frame_equal(written_and_read_again, expected) + + written_and_read_again = written_and_read_again.set_index("index") + tm.assert_frame_equal(written_and_read_again, expected) def test_variable_labels(self, datapath): with StataReader(datapath("io", "data", "stata", "stata7_115.dta")) as rdr: @@ -818,11 +861,12 @@ def test_big_dates(self, datapath): expected.index.name = "index" expected.to_stata(path, convert_dates=date_conversion) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal( - written_and_read_again.set_index("index"), - expected, - check_datetimelike_compat=True, - ) + + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + expected.set_index(expected.index.astype(np.int32)), + check_datetimelike_compat=True, + ) def test_dtype_conversion(self, datapath): expected = self.read_csv(datapath("io", "data", "stata", "stata6.csv")) @@ -936,7 +980,7 @@ def test_categorical_writing(self, version): original = pd.concat( [original[col].astype("category") for col in original], axis=1 ) - expected.index.name = "index" + expected.index = expected.index.set_names("index").astype(np.int32) expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str) expected["unlabeled"] = expected["unlabeled"].apply(str) @@ -955,8 +999,9 @@ def test_categorical_writing(self, version): with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) - res = written_and_read_again.set_index("index") - tm.assert_frame_equal(res, expected) + + res = written_and_read_again.set_index("index") + tm.assert_frame_equal(res, expected) def test_categorical_warnings_and_errors(self): # Warning for non-string labels @@ -1000,15 +1045,17 @@ def test_categorical_with_stata_missing_values(self, version): with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) - res = written_and_read_again.set_index("index") - expected = original.copy() - for col in expected: - cat = expected[col]._values - new_cats = cat.remove_unused_categories().categories - cat = cat.set_categories(new_cats, ordered=True) - expected[col] = cat - tm.assert_frame_equal(res, expected) + res = written_and_read_again.set_index("index") + + expected = original.copy() + for col in expected: + cat = expected[col]._values + new_cats = cat.remove_unused_categories().categories + cat = cat.set_categories(new_cats, ordered=True) + expected[col] = cat + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("file", ["stata10_115", "stata10_117"]) def test_categorical_order(self, file, datapath): @@ -1456,8 +1503,11 @@ def test_out_of_range_float(self): with tm.ensure_clean() as path: original.to_stata(path) reread = read_stata(path) - original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) - tm.assert_frame_equal(original, reread.set_index("index")) + + original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(reread.set_index("index"), expected) @pytest.mark.parametrize("infval", [np.inf, -np.inf]) def test_inf(self, infval): @@ -1885,7 +1935,10 @@ def test_compression(compression, version, use_dict, infer): elif compression is None: fp = path reread = read_stata(fp, index_col="index") - tm.assert_frame_equal(reread, df) + + expected = df.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(reread, expected) @pytest.mark.parametrize("method", ["zip", "infer"]) @@ -1906,20 +1959,29 @@ def test_compression_dict(method, file_ext): else: fp = path reread = read_stata(fp, index_col="index") - tm.assert_frame_equal(reread, df) + + expected = df.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(reread, expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_chunked_categorical(version): df = DataFrame({"cats": Series(["a", "b", "a", "b", "c"], dtype="category")}) df.index.name = "index" + + expected = df.copy() + expected.index = expected.index.astype(np.int32) + with tm.ensure_clean() as path: df.to_stata(path, version=version) with StataReader(path, chunksize=2, order_categoricals=False) as reader: for i, block in enumerate(reader): block = block.set_index("index") assert "cats" in block - tm.assert_series_equal(block.cats, df.cats.iloc[2 * i : 2 * (i + 1)]) + tm.assert_series_equal( + block.cats, expected.cats.iloc[2 * i : 2 * (i + 1)] + ) def test_chunked_categorical_partial(datapath): diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index ca5444fd4e62fc..8e7e5fd649f168 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -648,7 +648,7 @@ def test_selection_api_validation(): # non DatetimeIndex msg = ( "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " - "but got an instance of 'Int64Index'" + "but got an instance of 'NumericIndex'" ) with pytest.raises(TypeError, match=msg): df.resample("2D", level="v") diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 0dbe45eeb1e823..66093fa20abe34 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -441,14 +441,13 @@ def test_merge_datetime_index(self, klass): if klass is not None: on_vector = klass(on_vector) - expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + exp_years = np.array([2016, 2017, 2018], dtype=np.int32) + expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years}) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame( - {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} - ) + expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]}) result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) @@ -852,14 +851,13 @@ def test_merge_datetime_index(self, box): if box is not None: on_vector = box(on_vector) - expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + exp_years = np.array([2016, 2017, 2018], dtype=np.int32) + expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years}) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame( - {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} - ) + expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]}) result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f9119ea43160b3..12cdb2d31ecc0b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -411,7 +411,14 @@ def test_pivot_no_values(self): res = df.pivot_table(index=df.index.month, columns=df.index.day) exp_columns = MultiIndex.from_tuples([("A", 1), ("A", 2)]) - exp = DataFrame([[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns) + exp_columns = exp_columns.set_levels( + exp_columns.levels[1].astype(np.int32), level=1 + ) + exp = DataFrame( + [[2.5, 4.0], [2.0, np.nan]], + index=Index([1, 2], dtype=np.int32), + columns=exp_columns, + ) tm.assert_frame_equal(res, exp) df = DataFrame( @@ -424,7 +431,9 @@ def test_pivot_no_values(self): res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="M")) exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) exp_columns.names = [None, "dt"] - exp = DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) + exp = DataFrame( + [3.25, 2.0], index=Index([1, 2], dtype=np.int32), columns=exp_columns + ) tm.assert_frame_equal(res, exp) res = df.pivot_table( @@ -1604,7 +1613,7 @@ def test_pivot_dtaccessor(self): expected = DataFrame( {7: [0, 3], 8: [1, 4], 9: [2, 5]}, index=exp_idx, - columns=Index([7, 8, 9], name="dt1"), + columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) tm.assert_frame_equal(result, expected) @@ -1614,8 +1623,8 @@ def test_pivot_dtaccessor(self): expected = DataFrame( {7: [0, 3], 8: [1, 4], 9: [2, 5]}, - index=Index([1, 2], name="dt2"), - columns=Index([7, 8, 9], name="dt1"), + index=Index([1, 2], dtype=np.int32, name="dt2"), + columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) tm.assert_frame_equal(result, expected) @@ -1627,10 +1636,16 @@ def test_pivot_dtaccessor(self): ) exp_col = MultiIndex.from_arrays( - [[7, 7, 8, 8, 9, 9], [1, 2] * 3], names=["dt1", "dt2"] + [ + np.array([7, 7, 8, 8, 9, 9], dtype=np.int32), + np.array([1, 2] * 3, dtype=np.int32), + ], + names=["dt1", "dt2"], ) expected = DataFrame( - np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), index=[2013], columns=exp_col + np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), + index=Index([2013], dtype=np.int32), + columns=exp_col, ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 3a9ddaebf29341..4d0be7464cb3d9 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -23,8 +23,8 @@ def test_datetimeindex(self): # make sure that the ordering on datetimeindex is consistent x = date_range("2000-01-01", periods=2) result1, result2 = (Index(y).day for y in cartesian_product([x, x])) - expected1 = Index([1, 1, 2, 2]) - expected2 = Index([1, 2, 1, 2]) + expected1 = Index([1, 1, 2, 2], dtype=np.int32) + expected2 = Index([1, 2, 1, 2], dtype=np.int32) tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 689c8ba845a6cf..adb11b88cf6671 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -229,14 +229,14 @@ def test_dt_namespace_accessor_index_and_values(self): dti = date_range("20140204", periods=3, freq="s") ser = Series(dti, index=index, name="xxx") exp = Series( - np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx" + np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx" ) tm.assert_series_equal(ser.dt.year, exp) - exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") + exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx") tm.assert_series_equal(ser.dt.month, exp) - exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") + exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx") tm.assert_series_equal(ser.dt.second, exp) exp = Series([ser[0]] * 3, index=index, name="xxx") @@ -386,7 +386,7 @@ def test_dt_namespace_accessor_categorical(self): dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) ser = Series(pd.Categorical(dti), name="foo") result = ser.dt.year - expected = Series([2017, 2017, 2018, 2018], name="foo") + expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo") tm.assert_series_equal(result, expected) def test_dt_tz_localize_categorical(self, tz_aware_fixture): @@ -741,6 +741,7 @@ def test_hour_index(self): result = dt_series.dt.hour expected = Series( [0, 1, 2, 3, 4], + dtype="int32", index=[2, 6, 7, 8, 11], ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7d77a755e082be..3f14fe5936c057 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1159,7 +1159,7 @@ def expected(self, val): return Series(res_values) -@pytest.mark.parametrize("val", [512, np.int16(512)]) +@pytest.mark.parametrize("val", [np.int16(512), np.int16(512)]) class TestSetitemIntoIntegerSeriesNeedsUpcast(SetitemCastingEquivalents): @pytest.fixture def obj(self): @@ -1174,7 +1174,7 @@ def expected(self): return Series([1, 512, 3], dtype=np.int16) -@pytest.mark.parametrize("val", [2**33 + 1.0, 2**33 + 1.1, 2**62]) +@pytest.mark.parametrize("val", [2**30 + 1.0, 2**33 + 1.1, 2**62]) class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents): # https://github.com/pandas-dev/pandas/issues/39584#issuecomment-941212124 @pytest.fixture @@ -1187,10 +1187,12 @@ def key(self): @pytest.fixture def expected(self, val): - if val % 1 != 0: + if val > np.iinfo(np.int64).max: dtype = "f8" - else: + elif val > np.iinfo(np.int32).max: dtype = "i8" + else: + dtype = "i4" return Series([val, 2, 3], dtype=dtype) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b982d247c27070..201008f5e170bb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1764,7 +1764,7 @@ def test_pad(self): # corner case old = Index([5, 10]) - new = Index(np.arange(5)) + new = Index(np.arange(5, dtype=np.int64)) filler = libalgos.pad["int64_t"](old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(filler, expect_filler) diff --git a/pandas/tests/util/test_assert_categorical_equal.py b/pandas/tests/util/test_assert_categorical_equal.py index 29a0805bceb987..fda4fa770fd9be 100644 --- a/pandas/tests/util/test_assert_categorical_equal.py +++ b/pandas/tests/util/test_assert_categorical_equal.py @@ -22,8 +22,8 @@ def test_categorical_equal_order_mismatch(check_category_order): msg = """Categorical\\.categories are different Categorical\\.categories values are different \\(100\\.0 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3, 4]\\, dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_categorical_equal(c1, c2, **kwargs) else: @@ -34,8 +34,8 @@ def test_categorical_equal_categories_mismatch(): msg = """Categorical\\.categories are different Categorical\\.categories values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" c1 = Categorical([1, 2, 3, 4]) c2 = Categorical([1, 2, 3, 5]) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 71799c73f35c6c..0052ea671a5b0c 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -17,7 +17,7 @@ def test_index_equal_levels_mismatch(): msg = """Index are different Index levels are different -\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[left\\]: 1, NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 2, MultiIndex\\(\\[\\('A', 1\\), \\('A', 2\\), \\('B', 3\\), @@ -35,8 +35,8 @@ def test_index_equal_values_mismatch(check_exact): msg = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) @@ -49,8 +49,8 @@ def test_index_equal_length_mismatch(check_exact): msg = """Index are different Index length are different -\\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: 3, NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 4, NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = Index([1, 2, 3]) idx2 = Index([1, 2, 3, 4]) @@ -67,22 +67,29 @@ def test_index_equal_class(exact): tm.assert_index_equal(idx1, idx2, exact=exact) -@pytest.mark.parametrize( - "idx_values, msg_str", - [ - [[1, 2, 3.0], "Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"], - [range(3), "RangeIndex\\(start=0, stop=3, step=1\\)"], - ], -) -def test_index_equal_class_mismatch(check_exact, idx_values, msg_str): - msg = f"""Index are different +def test_int_float_index_equal_class_mismatch(check_exact): + msg = """Index are different + +Attribute "inferred_type" are different +\\[left\\]: integer +\\[right\\]: floating""" + + idx1 = Index([1, 2, 3]) + idx2 = Index([1, 2, 3], dtype=np.float64) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) + + +def test_range_index_equal_class_mismatch(check_exact): + msg = """Index are different Index classes are different -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: {msg_str}""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: """ idx1 = Index([1, 2, 3]) - idx2 = Index(idx_values) + idx2 = RangeIndex(range(3)) with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) @@ -96,8 +103,8 @@ def test_index_equal_values_close(check_exact): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) -\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" +\\[left\\]: NumericIndex\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: NumericIndex\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, check_exact=check_exact) @@ -114,8 +121,8 @@ def test_index_equal_values_less_close(check_exact, rtol): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) -\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" +\\[left\\]: NumericIndex\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: NumericIndex\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -131,8 +138,8 @@ def test_index_equal_values_too_far(check_exact, rtol): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -146,8 +153,8 @@ def test_index_equal_value_oder_mismatch(check_exact, rtol, check_order): msg = """Index are different Index values are different \\(66\\.66667 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[3, 2, 1\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[3, 2, 1\\], dtype='int64'\\)""" if check_order: with pytest.raises(AssertionError, match=msg): @@ -168,8 +175,8 @@ def test_index_equal_level_values_mismatch(check_exact, rtol): msg = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -225,7 +232,7 @@ def test_index_equal_range_categories(check_categorical, exact): Index classes are different \\[left\\]: RangeIndex\\(start=0, stop=10, step=1\\) -\\[right\\]: Int64Index\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" +\\[right\\]: NumericIndex\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" rcat = CategoricalIndex(RangeIndex(10)) icat = CategoricalIndex(list(range(10))) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 3da14bce6facdb..60e1ee103ee1c8 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -919,7 +919,10 @@ def test_nan_and_zero_endpoints(self): arr, name="adl2", index=MultiIndex.from_arrays( - [[0] * 999 + [1], [0] * 999 + [1]], names=["index", "index"] + [ + Index([0] * 999 + [1], dtype=np.intp, name="index"), + Index([0] * 999 + [1], dtype=np.intp, name="index"), + ], ), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 04132ced044fcc..7370c829d88923 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -99,6 +99,7 @@ def test_flex_binary_frame(method, frame): exp = DataFrame( {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} ) + exp.columns = exp.columns.astype(np.intp) tm.assert_frame_equal(res3, exp) @@ -431,7 +432,11 @@ def test_multindex_columns_pairwise_func(self): expected = DataFrame( np.nan, index=MultiIndex.from_arrays( - [np.repeat(np.arange(5), 2), ["M", "N"] * 5, ["P", "Q"] * 5], + [ + np.repeat(np.arange(5, dtype=np.int64), 2), + ["M", "N"] * 5, + ["P", "Q"] * 5, + ], names=[None, "a", "b"], ), columns=columns,