From 8e8d72db28def9c1a339c9fe79f0c9665fab8d9e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 22 Jan 2023 06:52:48 -0800 Subject: [PATCH] TST: read_fwf with dtype_backend (#50911) Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com> --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/tests/io/parser/test_read_fwf.py | 30 ++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0ceda331de790d..605f1d4b26e13f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -54,6 +54,7 @@ to select the nullable dtypes implementation. * :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``) * :func:`read_clipboard` (with ``engine="python"``) +* :func:`read_fwf` * :func:`read_excel` * :func:`read_html` * :func:`read_xml` diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 0dc8ee81278dd3..f4320f64805173 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -948,24 +948,27 @@ def test_widths_and_usecols(): tm.assert_frame_equal(result, expected) -def test_use_nullable_dtypes(string_storage): +@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) +def test_use_nullable_dtypes(string_storage, dtype_backend): # GH#50289 - data = """a b c d e f g h i -1 2.5 True a -3 4.5 False b True 6 7.5 a""" - with pd.option_context("mode.string_storage", string_storage): - result = read_fwf(StringIO(data), use_nullable_dtypes=True) + if string_storage == "pyarrow" or dtype_backend == "pyarrow": + pa = pytest.importorskip("pyarrow") if string_storage == "python": arr = StringArray(np.array(["a", "b"], dtype=np.object_)) arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_)) else: - import pyarrow as pa - arr = ArrowStringArray(pa.array(["a", "b"])) arr_na = ArrowStringArray(pa.array([None, "a"])) + data = """a b c d e f g h i +1 2.5 True a +3 4.5 False b True 6 7.5 a""" + with pd.option_context("mode.string_storage", string_storage): + with pd.option_context("mode.dtype_backend", dtype_backend): + result = read_fwf(StringIO(data), use_nullable_dtypes=True) + expected = DataFrame( { "a": pd.Series([1, 3], dtype="Int64"), @@ -979,4 +982,15 @@ def test_use_nullable_dtypes(string_storage): "i": pd.Series([pd.NA, pd.NA], dtype="Int64"), } ) + if dtype_backend == "pyarrow": + from pandas.arrays import ArrowExtensionArray + + expected = DataFrame( + { + col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True)) + for col in expected.columns + } + ) + expected["i"] = ArrowExtensionArray(pa.array([None, None])) + tm.assert_frame_equal(result, expected)