Skip to content

Commit

Permalink
TYP: require Index objects earlier in internals (#33100)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Apr 4, 2020
1 parent d88b90d commit 5e21be0
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 28 deletions.
4 changes: 0 additions & 4 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
BlockManager,
SingleBlockManager,
concatenate_block_managers,
create_block_manager_from_arrays,
create_block_manager_from_blocks,
)

__all__ = [
Expand All @@ -40,6 +38,4 @@
"BlockManager",
"SingleBlockManager",
"concatenate_block_managers",
"create_block_manager_from_arrays",
"create_block_manager_from_blocks",
]
25 changes: 14 additions & 11 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
constructors before passing them to a BlockManager.
"""
from collections import abc
from typing import Tuple

import numpy as np
import numpy.ma as ma
Expand All @@ -29,7 +30,6 @@
ABCDataFrame,
ABCDatetimeIndex,
ABCIndexClass,
ABCPeriodIndex,
ABCSeries,
ABCTimedeltaIndex,
)
Expand All @@ -44,7 +44,7 @@
get_objs_combined_axis,
union_indexes,
)
from pandas.core.internals import (
from pandas.core.internals.managers import (
create_block_manager_from_arrays,
create_block_manager_from_blocks,
)
Expand All @@ -53,12 +53,16 @@
# BlockManager Interface


def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrity=True):
def arrays_to_mgr(
arrays, arr_names, index, columns, dtype=None, verify_integrity: bool = True
):
"""
Segregate Series based on type and coerce into matrices.
Needs to handle a lot of exceptional cases.
"""
arr_names = ensure_index(arr_names)

if verify_integrity:
# figure out the index, if necessary
if index is None:
Expand All @@ -70,6 +74,9 @@ def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrit
arrays = _homogenize(arrays, index, dtype)

columns = ensure_index(columns)
else:
columns = ensure_index(columns)
index = ensure_index(index)

# from BlockManager perspective
axes = [columns, index]
Expand Down Expand Up @@ -163,7 +170,8 @@ def init_ndarray(values, index, columns, dtype=None, copy=False):
values = [values]

if columns is None:
columns = list(range(len(values)))
columns = Index(range(len(values)))

return arrays_to_mgr(values, columns, index, columns, dtype=dtype)

# by definition an array here
Expand Down Expand Up @@ -416,7 +424,7 @@ def get_names_from_index(data):
return index


def _get_axes(N, K, index, columns):
def _get_axes(N, K, index, columns) -> Tuple[Index, Index]:
# helper to create the axes as indexes
# return axes or defaults

Expand Down Expand Up @@ -635,12 +643,7 @@ def sanitize_index(data, index: Index):
if len(data) != len(index):
raise ValueError("Length of values does not match length of index")

if isinstance(data, ABCIndexClass):
pass
elif isinstance(data, (ABCPeriodIndex, ABCDatetimeIndex)):
data = data._values

elif isinstance(data, np.ndarray):
if isinstance(data, np.ndarray):

# coerce datetimelike types
if data.dtype.kind in ["M", "m"]:
Expand Down
33 changes: 20 additions & 13 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import itertools
import operator
import re
from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union
from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -341,7 +341,7 @@ def _verify_integrity(self) -> None:
tot_items = sum(len(x.mgr_locs) for x in self.blocks)
for block in self.blocks:
if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
construction_error(tot_items, block.shape[1:], self.axes)
raise construction_error(tot_items, block.shape[1:], self.axes)
if len(self.items) != tot_items:
raise AssertionError(
"Number of manager items must equal union of "
Expand Down Expand Up @@ -1648,7 +1648,7 @@ def concat(
# Constructor Helpers


def create_block_manager_from_blocks(blocks, axes):
def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
try:
if len(blocks) == 1 and not isinstance(blocks[0], Block):
# if blocks[0] is of length 0, return empty blocks
Expand All @@ -1669,18 +1669,23 @@ def create_block_manager_from_blocks(blocks, axes):
except ValueError as e:
blocks = [getattr(b, "values", b) for b in blocks]
tot_items = sum(b.shape[0] for b in blocks)
construction_error(tot_items, blocks[0].shape[1:], axes, e)
raise construction_error(tot_items, blocks[0].shape[1:], axes, e)


def create_block_manager_from_arrays(arrays, names, axes):
def create_block_manager_from_arrays(
arrays, names: Index, axes: List[Index]
) -> BlockManager:
assert isinstance(names, Index)
assert isinstance(axes, list)
assert all(isinstance(x, Index) for x in axes)

try:
blocks = form_blocks(arrays, names, axes)
mgr = BlockManager(blocks, axes)
mgr._consolidate_inplace()
return mgr
except ValueError as e:
construction_error(len(arrays), arrays[0].shape, axes, e)
raise construction_error(len(arrays), arrays[0].shape, axes, e)


def construction_error(tot_items, block_shape, axes, e=None):
Expand All @@ -1695,23 +1700,25 @@ def construction_error(tot_items, block_shape, axes, e=None):
if len(implied) <= 2:
implied = implied[::-1]

# We return the exception object instead of raising it so that we
# can raise it in the caller; mypy plays better with that
if passed == implied and e is not None:
raise e
return e
if block_shape[0] == 0:
raise ValueError("Empty data passed with indices specified.")
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
return ValueError("Empty data passed with indices specified.")
return ValueError(f"Shape of passed values is {passed}, indices imply {implied}")


# -----------------------------------------------------------------------


def form_blocks(arrays, names, axes):
def form_blocks(arrays, names: Index, axes) -> List[Block]:
# put "leftover" items in float bucket, where else?
# generalize?
items_dict = defaultdict(list)
items_dict: DefaultDict[str, List] = defaultdict(list)
extra_locs = []

names_idx = ensure_index(names)
names_idx = names
if names_idx.equals(axes[0]):
names_indexer = np.arange(len(names_idx))
else:
Expand All @@ -1729,7 +1736,7 @@ def form_blocks(arrays, names, axes):
block_type = get_block_type(v)
items_dict[block_type.__name__].append((i, k, v))

blocks = []
blocks: List[Block] = []
if len(items_dict["FloatBlock"]):
float_blocks = _multi_blockify(items_dict["FloatBlock"])
blocks.extend(float_blocks)
Expand Down

0 comments on commit 5e21be0

Please sign in to comment.