Skip to content

Commit

Permalink
TST: read_fwf with dtype_backend (pandas-dev#50911)
Browse files Browse the repository at this point in the history
Co-authored-by: Patrick Hoefler <[email protected]>
  • Loading branch information
2 people authored and pooja-subramaniam committed Jan 25, 2023
1 parent 4ea8e57 commit 8e8d72d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ to select the nullable dtypes implementation.

* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
* :func:`read_clipboard` (with ``engine="python"``)
* :func:`read_fwf`
* :func:`read_excel`
* :func:`read_html`
* :func:`read_xml`
Expand Down
30 changes: 22 additions & 8 deletions pandas/tests/io/parser/test_read_fwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,24 +948,27 @@ def test_widths_and_usecols():
tm.assert_frame_equal(result, expected)


def test_use_nullable_dtypes(string_storage):
@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
def test_use_nullable_dtypes(string_storage, dtype_backend):
# GH#50289

data = """a b c d e f g h i
1 2.5 True a
3 4.5 False b True 6 7.5 a"""
with pd.option_context("mode.string_storage", string_storage):
result = read_fwf(StringIO(data), use_nullable_dtypes=True)
if string_storage == "pyarrow" or dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")

if string_storage == "python":
arr = StringArray(np.array(["a", "b"], dtype=np.object_))
arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
else:
import pyarrow as pa

arr = ArrowStringArray(pa.array(["a", "b"]))
arr_na = ArrowStringArray(pa.array([None, "a"]))

data = """a b c d e f g h i
1 2.5 True a
3 4.5 False b True 6 7.5 a"""
with pd.option_context("mode.string_storage", string_storage):
with pd.option_context("mode.dtype_backend", dtype_backend):
result = read_fwf(StringIO(data), use_nullable_dtypes=True)

expected = DataFrame(
{
"a": pd.Series([1, 3], dtype="Int64"),
Expand All @@ -979,4 +982,15 @@ def test_use_nullable_dtypes(string_storage):
"i": pd.Series([pd.NA, pd.NA], dtype="Int64"),
}
)
if dtype_backend == "pyarrow":
from pandas.arrays import ArrowExtensionArray

expected = DataFrame(
{
col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
for col in expected.columns
}
)
expected["i"] = ArrowExtensionArray(pa.array([None, None]))

tm.assert_frame_equal(result, expected)

0 comments on commit 8e8d72d

Please sign in to comment.