Skip to content

Commit 065accb

Browse files
jorisvandenbosscheSeeminSyed
authored andcommitted
PERF: allow to skip validation/sanitization in DataFrame._from_arrays (pandas-dev#32858)
1 parent ab1c22b commit 065accb

File tree

2 files changed

+47
-11
lines changed

2 files changed

+47
-11
lines changed

pandas/core/frame.py

+35-2
Original file line numberDiff line numberDiff line change
@@ -1889,8 +1889,41 @@ def to_records(
18891889
return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats})
18901890

18911891
@classmethod
1892-
def _from_arrays(cls, arrays, columns, index, dtype=None) -> "DataFrame":
1893-
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
1892+
def _from_arrays(
1893+
cls, arrays, columns, index, dtype=None, verify_integrity=True
1894+
) -> "DataFrame":
1895+
"""
1896+
Create DataFrame from a list of arrays corresponding to the columns.
1897+
1898+
Parameters
1899+
----------
1900+
arrays : list-like of arrays
1901+
Each array in the list corresponds to one column, in order.
1902+
columns : list-like, Index
1903+
The column names for the resulting DataFrame.
1904+
index : list-like, Index
1905+
The rows labels for the resulting DataFrame.
1906+
dtype : dtype, optional
1907+
Optional dtype to enforce for all arrays.
1908+
verify_integrity : bool, default True
1909+
Validate and homogenize all input. If set to False, it is assumed
1910+
that all elements of `arrays` are actual arrays how they will be
1911+
stored in a block (numpy ndarray or ExtensionArray), have the same
1912+
length as and are aligned with the index, and that `columns` and
1913+
`index` are ensured to be an Index object.
1914+
1915+
Returns
1916+
-------
1917+
DataFrame
1918+
"""
1919+
mgr = arrays_to_mgr(
1920+
arrays,
1921+
columns,
1922+
index,
1923+
columns,
1924+
dtype=dtype,
1925+
verify_integrity=verify_integrity,
1926+
)
18941927
return cls(mgr)
18951928

18961929
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")

pandas/core/internals/construction.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,26 @@
5353
# BlockManager Interface
5454

5555

56-
def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
56+
def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None, verify_integrity=True):
5757
"""
5858
Segregate Series based on type and coerce into matrices.
5959
6060
Needs to handle a lot of exceptional cases.
6161
"""
62-
# figure out the index, if necessary
63-
if index is None:
64-
index = extract_index(arrays)
65-
else:
66-
index = ensure_index(index)
62+
if verify_integrity:
63+
# figure out the index, if necessary
64+
if index is None:
65+
index = extract_index(arrays)
66+
else:
67+
index = ensure_index(index)
6768

68-
# don't force copy because getting jammed in an ndarray anyway
69-
arrays = _homogenize(arrays, index, dtype)
69+
# don't force copy because getting jammed in an ndarray anyway
70+
arrays = _homogenize(arrays, index, dtype)
71+
72+
columns = ensure_index(columns)
7073

7174
# from BlockManager perspective
72-
axes = [ensure_index(columns), index]
75+
axes = [columns, index]
7376

7477
return create_block_manager_from_arrays(arrays, arr_names, axes)
7578

0 commit comments

Comments
 (0)