diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 9feec424389e75..793f11c62f9f5f 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -24,6 +24,32 @@ ) +@pytest.fixture +def path(ext): + """ + Fixture to open file for use in each test case. + """ + with ensure_clean(ext) as file_path: + yield file_path + + +@pytest.fixture +def set_engine(engine, ext): + """ + Fixture to set engine for use in each test case. + + Rather than requiring `engine=...` to be provided explicitly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + """ + option_name = "io.excel.{ext}.writer".format(ext=ext.strip(".")) + prev_engine = get_option(option_name) + set_option(option_name, engine) + yield + set_option(option_name, prev_engine) # Roll back option change + + @td.skip_if_no("xlrd") @pytest.mark.parametrize("ext", [".xls", ".xlsx", ".xlsm"]) class TestRoundTrip: @@ -233,34 +259,6 @@ def test_read_excel_parse_dates(self, ext): tm.assert_frame_equal(df, res) -class _WriterBase: - @pytest.fixture(autouse=True) - def set_engine_and_path(self, engine, ext): - """Fixture to set engine and open file for use in each test case - - Rather than requiring `engine=...` to be provided explicitly as an - argument in each test, this fixture sets a global option to dictate - which engine should be used to write Excel files. After executing - the test it rolls back said change to the global option. - - It also uses a context manager to open a temporary excel file for - the function to write to, accessible via `self.path` - - Notes - ----- - This fixture will run as part of each test method defined in the - class and any subclasses, on account of the `autouse=True` - argument - """ - option_name = "io.excel.{ext}.writer".format(ext=ext.strip(".")) - prev_engine = get_option(option_name) - set_option(option_name, engine) - with ensure_clean(ext) as path: - self.path = path - yield - set_option(option_name, prev_engine) # Roll back option change - - @td.skip_if_no("xlrd") @pytest.mark.parametrize( "engine,ext", @@ -271,10 +269,9 @@ class and any subclasses, on account of the `autouse=True` pytest.param("xlsxwriter", ".xlsx", marks=td.skip_if_no("xlsxwriter")), ], ) -class TestExcelWriter(_WriterBase): - # Base class for test cases to run with different Excel writers. - - def test_excel_sheet_size(self, engine, ext): +@pytest.mark.usefixtures("set_engine") +class TestExcelWriter: + def test_excel_sheet_size(self, path): # GH 26080 breaking_row_count = 2 ** 20 + 1 @@ -287,18 +284,18 @@ def test_excel_sheet_size(self, engine, ext): msg = "sheet is too large" with pytest.raises(ValueError, match=msg): - row_df.to_excel(self.path) + row_df.to_excel(path) with pytest.raises(ValueError, match=msg): - col_df.to_excel(self.path) + col_df.to_excel(path) - def test_excel_sheet_by_name_raise(self, engine, ext): + def test_excel_sheet_by_name_raise(self, path): import xlrd gt = DataFrame(np.random.randn(10, 2)) - gt.to_excel(self.path) + gt.to_excel(path) - xl = ExcelFile(self.path) + xl = ExcelFile(path) df = pd.read_excel(xl, 0, index_col=0) tm.assert_frame_equal(gt, df) @@ -306,162 +303,162 @@ def test_excel_sheet_by_name_raise(self, engine, ext): with pytest.raises(xlrd.XLRDError): pd.read_excel(xl, "0") - def test_excel_writer_context_manager(self, frame, engine, ext): - with ExcelWriter(self.path) as writer: + def test_excel_writer_context_manager(self, frame, path): + with ExcelWriter(path) as writer: frame.to_excel(writer, "Data1") frame2 = frame.copy() frame2.columns = frame.columns[::-1] frame2.to_excel(writer, "Data2") - with ExcelFile(self.path) as reader: + with ExcelFile(path) as reader: found_df = pd.read_excel(reader, "Data1", index_col=0) found_df2 = pd.read_excel(reader, "Data2", index_col=0) tm.assert_frame_equal(found_df, frame) tm.assert_frame_equal(found_df2, frame2) - def test_roundtrip(self, engine, ext, frame): + def test_roundtrip(self, frame, path): frame = frame.copy() frame["A"][:5] = np.nan - frame.to_excel(self.path, "test1") - frame.to_excel(self.path, "test1", columns=["A", "B"]) - frame.to_excel(self.path, "test1", header=False) - frame.to_excel(self.path, "test1", index=False) + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) # test roundtrip - frame.to_excel(self.path, "test1") - recons = pd.read_excel(self.path, "test1", index_col=0) + frame.to_excel(path, "test1") + recons = pd.read_excel(path, "test1", index_col=0) tm.assert_frame_equal(frame, recons) - frame.to_excel(self.path, "test1", index=False) - recons = pd.read_excel(self.path, "test1", index_col=None) + frame.to_excel(path, "test1", index=False) + recons = pd.read_excel(path, "test1", index_col=None) recons.index = frame.index tm.assert_frame_equal(frame, recons) - frame.to_excel(self.path, "test1", na_rep="NA") - recons = pd.read_excel(self.path, "test1", index_col=0, na_values=["NA"]) + frame.to_excel(path, "test1", na_rep="NA") + recons = pd.read_excel(path, "test1", index_col=0, na_values=["NA"]) tm.assert_frame_equal(frame, recons) # GH 3611 - frame.to_excel(self.path, "test1", na_rep="88") - recons = pd.read_excel(self.path, "test1", index_col=0, na_values=["88"]) + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel(path, "test1", index_col=0, na_values=["88"]) tm.assert_frame_equal(frame, recons) - frame.to_excel(self.path, "test1", na_rep="88") - recons = pd.read_excel(self.path, "test1", index_col=0, na_values=[88, 88.0]) + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel(path, "test1", index_col=0, na_values=[88, 88.0]) tm.assert_frame_equal(frame, recons) # GH 6573 - frame.to_excel(self.path, "Sheet1") - recons = pd.read_excel(self.path, index_col=0) + frame.to_excel(path, "Sheet1") + recons = pd.read_excel(path, index_col=0) tm.assert_frame_equal(frame, recons) - frame.to_excel(self.path, "0") - recons = pd.read_excel(self.path, index_col=0) + frame.to_excel(path, "0") + recons = pd.read_excel(path, index_col=0) tm.assert_frame_equal(frame, recons) # GH 8825 Pandas Series should provide to_excel method s = frame["A"] - s.to_excel(self.path) - recons = pd.read_excel(self.path, index_col=0) + s.to_excel(path) + recons = pd.read_excel(path, index_col=0) tm.assert_frame_equal(s.to_frame(), recons) - def test_mixed(self, engine, ext, frame): + def test_mixed(self, frame, path): mixed_frame = frame.copy() mixed_frame["foo"] = "bar" - mixed_frame.to_excel(self.path, "test1") - reader = ExcelFile(self.path) + mixed_frame.to_excel(path, "test1") + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(mixed_frame, recons) - def test_ts_frame(self, tsframe, engine, ext): + def test_ts_frame(self, tsframe, path): df = tsframe - df.to_excel(self.path, "test1") - reader = ExcelFile(self.path) + df.to_excel(path, "test1") + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons) - def test_basics_with_nan(self, engine, ext, frame): + def test_basics_with_nan(self, frame, path): frame = frame.copy() frame["A"][:5] = np.nan - frame.to_excel(self.path, "test1") - frame.to_excel(self.path, "test1", columns=["A", "B"]) - frame.to_excel(self.path, "test1", header=False) - frame.to_excel(self.path, "test1", index=False) + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64]) - def test_int_types(self, engine, ext, np_type): + def test_int_types(self, np_type, path): # Test np.int values read come back as int # (rather than float which is Excel's format). df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) - df.to_excel(self.path, "test1") + df.to_excel(path, "test1") - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) int_frame = df.astype(np.int64) tm.assert_frame_equal(int_frame, recons) - recons2 = pd.read_excel(self.path, "test1", index_col=0) + recons2 = pd.read_excel(path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) # Test with convert_float=False comes back as float. float_frame = df.astype(float) - recons = pd.read_excel(self.path, "test1", convert_float=False, index_col=0) + recons = pd.read_excel(path, "test1", convert_float=False, index_col=0) tm.assert_frame_equal( recons, float_frame, check_index_type=False, check_column_type=False ) @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) - def test_float_types(self, engine, ext, np_type): + def test_float_types(self, np_type, path): # Test np.float values read come back as float. df = DataFrame(np.random.random_sample(10), dtype=np_type) - df.to_excel(self.path, "test1") + df.to_excel(path, "test1") - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) tm.assert_frame_equal(df, recons, check_dtype=False) @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) - def test_bool_types(self, engine, ext, np_type): + def test_bool_types(self, np_type, path): # Test np.bool values read come back as float. df = DataFrame([1, 0, True, False], dtype=np_type) - df.to_excel(self.path, "test1") + df.to_excel(path, "test1") - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) tm.assert_frame_equal(df, recons) - def test_inf_roundtrip(self, engine, ext): + def test_inf_roundtrip(self, path): df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - df.to_excel(self.path, "test1") + df.to_excel(path, "test1") - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons) - def test_sheets(self, engine, ext, frame, tsframe): + def test_sheets(self, frame, tsframe, path): frame = frame.copy() frame["A"][:5] = np.nan - frame.to_excel(self.path, "test1") - frame.to_excel(self.path, "test1", columns=["A", "B"]) - frame.to_excel(self.path, "test1", header=False) - frame.to_excel(self.path, "test1", index=False) + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) # Test writing to separate sheets - writer = ExcelWriter(self.path) + writer = ExcelWriter(path) frame.to_excel(writer, "test1") tsframe.to_excel(writer, "test2") writer.save() - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(frame, recons) recons = pd.read_excel(reader, "test2", index_col=0) @@ -470,62 +467,62 @@ def test_sheets(self, engine, ext, frame, tsframe): assert "test1" == reader.sheet_names[0] assert "test2" == reader.sheet_names[1] - def test_colaliases(self, engine, ext, frame): + def test_colaliases(self, frame, path): frame = frame.copy() frame["A"][:5] = np.nan - frame.to_excel(self.path, "test1") - frame.to_excel(self.path, "test1", columns=["A", "B"]) - frame.to_excel(self.path, "test1", header=False) - frame.to_excel(self.path, "test1", index=False) + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) # column aliases col_aliases = Index(["AA", "X", "Y", "Z"]) - frame.to_excel(self.path, "test1", header=col_aliases) - reader = ExcelFile(self.path) + frame.to_excel(path, "test1", header=col_aliases) + reader = ExcelFile(path) rs = pd.read_excel(reader, "test1", index_col=0) xp = frame.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs) - def test_roundtrip_indexlabels(self, merge_cells, engine, ext, frame): + def test_roundtrip_indexlabels(self, merge_cells, frame, path): frame = frame.copy() frame["A"][:5] = np.nan - frame.to_excel(self.path, "test1") - frame.to_excel(self.path, "test1", columns=["A", "B"]) - frame.to_excel(self.path, "test1", header=False) - frame.to_excel(self.path, "test1", index=False) + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) # test index_label df = DataFrame(np.random.randn(10, 2)) >= 0 - df.to_excel(self.path, "test1", index_label=["test"], merge_cells=merge_cells) - reader = ExcelFile(self.path) + df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) df.index.names = ["test"] assert df.index.names == recons.index.names df = DataFrame(np.random.randn(10, 2)) >= 0 df.to_excel( - self.path, + path, "test1", index_label=["test", "dummy", "dummy2"], merge_cells=merge_cells, ) - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) df.index.names = ["test"] assert df.index.names == recons.index.names df = DataFrame(np.random.randn(10, 2)) >= 0 - df.to_excel(self.path, "test1", index_label="test", merge_cells=merge_cells) - reader = ExcelFile(self.path) + df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) df.index.names = ["test"] tm.assert_frame_equal(df, recons.astype(bool)) frame.to_excel( - self.path, + path, "test1", columns=["A", "B", "C", "D"], index=False, @@ -535,35 +532,35 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext, frame): df = frame.copy() df = df.set_index(["A", "B"]) - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=[0, 1]) tm.assert_frame_equal(df, recons, check_less_precise=True) - def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): + def test_excel_roundtrip_indexname(self, merge_cells, path): df = DataFrame(np.random.randn(10, 4)) df.index.name = "foo" - df.to_excel(self.path, merge_cells=merge_cells) + df.to_excel(path, merge_cells=merge_cells) - xf = ExcelFile(self.path) + xf = ExcelFile(path) result = pd.read_excel(xf, xf.sheet_names[0], index_col=0) tm.assert_frame_equal(result, df) assert result.index.name == "foo" - def test_excel_roundtrip_datetime(self, merge_cells, tsframe, engine, ext): + def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): # datetime.date, not sure what to test here exactly tsf = tsframe.copy() tsf.index = [x.date() for x in tsframe.index] - tsf.to_excel(self.path, "test1", merge_cells=merge_cells) + tsf.to_excel(path, "test1", merge_cells=merge_cells) - reader = ExcelFile(self.path) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(tsframe, recons) - def test_excel_date_datetime_format(self, engine, ext): + def test_excel_date_datetime_format(self, engine, ext, path): # see gh-4133 # # Excel output format strings @@ -585,7 +582,7 @@ def test_excel_date_datetime_format(self, engine, ext): ) with ensure_clean(ext) as filename2: - writer1 = ExcelWriter(self.path) + writer1 = ExcelWriter(path) writer2 = ExcelWriter( filename2, date_format="DD.MM.YYYY", @@ -598,7 +595,7 @@ def test_excel_date_datetime_format(self, engine, ext): writer1.close() writer2.close() - reader1 = ExcelFile(self.path) + reader1 = ExcelFile(path) reader2 = ExcelFile(filename2) rs1 = pd.read_excel(reader1, "test1", index_col=0) @@ -610,7 +607,7 @@ def test_excel_date_datetime_format(self, engine, ext): # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected) - def test_to_excel_interval_no_labels(self, engine, ext): + def test_to_excel_interval_no_labels(self, path): # see gh-19242 # # Test writing Interval without labels. @@ -620,13 +617,13 @@ def test_to_excel_interval_no_labels(self, engine, ext): df["new"] = pd.cut(df[0], 10) expected["new"] = pd.cut(expected[0], 10).astype(str) - df.to_excel(self.path, "test1") - reader = ExcelFile(self.path) + df.to_excel(path, "test1") + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_interval_labels(self, engine, ext): + def test_to_excel_interval_labels(self, path): # see gh-19242 # # Test writing Interval with labels. @@ -638,13 +635,13 @@ def test_to_excel_interval_labels(self, engine, ext): df["new"] = intervals expected["new"] = pd.Series(list(intervals)) - df.to_excel(self.path, "test1") - reader = ExcelFile(self.path) + df.to_excel(path, "test1") + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_timedelta(self, engine, ext): + def test_to_excel_timedelta(self, path): # see gh-19242, gh-9155 # # Test writing timedelta to xls. @@ -658,50 +655,50 @@ def test_to_excel_timedelta(self, engine, ext): lambda x: timedelta(seconds=x).total_seconds() / float(86400) ) - df.to_excel(self.path, "test1") - reader = ExcelFile(self.path) + df.to_excel(path, "test1") + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_periodindex(self, engine, ext, tsframe): + def test_to_excel_periodindex(self, tsframe, path): xp = tsframe.resample("M", kind="period").mean() - xp.to_excel(self.path, "sht1") + xp.to_excel(path, "sht1") - reader = ExcelFile(self.path) + reader = ExcelFile(path) rs = pd.read_excel(reader, "sht1", index_col=0) tm.assert_frame_equal(xp, rs.to_period("M")) - def test_to_excel_multiindex(self, merge_cells, engine, ext, frame): + def test_to_excel_multiindex(self, merge_cells, frame, path): arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index - frame.to_excel(self.path, "test1", header=False) - frame.to_excel(self.path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", columns=["A", "B"]) # round trip - frame.to_excel(self.path, "test1", merge_cells=merge_cells) - reader = ExcelFile(self.path) + frame.to_excel(path, "test1", merge_cells=merge_cells) + reader = ExcelFile(path) df = pd.read_excel(reader, "test1", index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 - def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): + def test_to_excel_multiindex_nan_label(self, merge_cells, path): df = pd.DataFrame( {"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)} ) df = df.set_index(["A", "B"]) - df.to_excel(self.path, merge_cells=merge_cells) - df1 = pd.read_excel(self.path, index_col=[0, 1]) + df.to_excel(path, merge_cells=merge_cells) + df1 = pd.read_excel(path, index_col=[0, 1]) tm.assert_frame_equal(df, df1) # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of # merge_cells - def test_to_excel_multiindex_cols(self, merge_cells, engine, ext, frame): + def test_to_excel_multiindex_cols(self, merge_cells, frame, path): arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index @@ -713,28 +710,28 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext, frame): header = 0 # round trip - frame.to_excel(self.path, "test1", merge_cells=merge_cells) - reader = ExcelFile(self.path) + frame.to_excel(path, "test1", merge_cells=merge_cells) + reader = ExcelFile(path) df = pd.read_excel(reader, "test1", header=header, index_col=[0, 1]) if not merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) frame.columns = [".".join(map(str, q)) for q in zip(*fm)] tm.assert_frame_equal(frame, df) - def test_to_excel_multiindex_dates(self, merge_cells, engine, ext, tsframe): + def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): # try multiindex with dates new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.index.names = ["time", "foo"] - tsframe.to_excel(self.path, "test1", merge_cells=merge_cells) - reader = ExcelFile(self.path) + tsframe.to_excel(path, "test1", merge_cells=merge_cells) + reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ("time", "foo") - def test_to_excel_multiindex_no_write_index(self, engine, ext): + def test_to_excel_multiindex_no_write_index(self, path): # Test writing and re-reading a MI without the index. GH 5616. # Initial non-MI frame. @@ -746,24 +743,24 @@ def test_to_excel_multiindex_no_write_index(self, engine, ext): frame2.index = multi_index # Write out to Excel without the index. - frame2.to_excel(self.path, "test1", index=False) + frame2.to_excel(path, "test1", index=False) # Read it back in. - reader = ExcelFile(self.path) + reader = ExcelFile(path) frame3 = pd.read_excel(reader, "test1") # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3) - def test_to_excel_float_format(self, engine, ext): + def test_to_excel_float_format(self, path): df = DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=["A", "B"], columns=["X", "Y", "Z"], ) - df.to_excel(self.path, "test1", float_format="%.2f") + df.to_excel(path, "test1", float_format="%.2f") - reader = ExcelFile(self.path) + reader = ExcelFile(path) result = pd.read_excel(reader, "test1", index_col=0) expected = DataFrame( @@ -773,7 +770,7 @@ def test_to_excel_float_format(self, engine, ext): ) tm.assert_frame_equal(result, expected) - def test_to_excel_output_encoding(self, engine, ext): + def test_to_excel_output_encoding(self, ext): # Avoid mixed inferred_type. df = DataFrame( [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]], @@ -786,7 +783,7 @@ def test_to_excel_output_encoding(self, engine, ext): result = pd.read_excel(filename, "TestSheet", encoding="utf8", index_col=0) tm.assert_frame_equal(result, df) - def test_to_excel_unicode_filename(self, engine, ext): + def test_to_excel_unicode_filename(self, ext, path): with ensure_clean("\u0192u." + ext) as filename: try: f = open(filename, "wb") @@ -916,14 +913,12 @@ def test_to_excel_unicode_filename(self, engine, ext): @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3]) def test_excel_010_hemstring( - self, merge_cells, engine, ext, c_idx_nlevels, r_idx_nlevels, use_headers + self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path ): def roundtrip(data, header=True, parser_hdr=0, index=True): - data.to_excel( - self.path, header=header, merge_cells=merge_cells, index=index - ) + data.to_excel(path, header=header, merge_cells=merge_cells, index=index) - xf = ExcelFile(self.path) + xf = ExcelFile(path) return pd.read_excel(xf, xf.sheet_names[0], header=parser_hdr) # Basic test. @@ -965,128 +960,128 @@ def roundtrip(data, header=True, parser_hdr=0, index=True): for c in range(len(res.columns)): assert res.iloc[r, c] is not np.nan - def test_duplicated_columns(self, engine, ext): + def test_duplicated_columns(self, path): # see gh-5235 df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"]) - df.to_excel(self.path, "test1") + df.to_excel(path, "test1") expected = DataFrame( [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"] ) # By default, we mangle. - result = pd.read_excel(self.path, "test1", index_col=0) + result = pd.read_excel(path, "test1", index_col=0) tm.assert_frame_equal(result, expected) # Explicitly, we pass in the parameter. - result = pd.read_excel(self.path, "test1", index_col=0, mangle_dupe_cols=True) + result = pd.read_excel(path, "test1", index_col=0, mangle_dupe_cols=True) tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"]) - df.to_excel(self.path, "test1") + df.to_excel(path, "test1") - result = pd.read_excel(self.path, "test1", index_col=0) + result = pd.read_excel(path, "test1", index_col=0) expected = DataFrame( [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"] ) tm.assert_frame_equal(result, expected) # see gh-10982 - df.to_excel(self.path, "test1", index=False, header=False) - result = pd.read_excel(self.path, "test1", header=None) + df.to_excel(path, "test1", index=False, header=False) + result = pd.read_excel(path, "test1", header=None) expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) tm.assert_frame_equal(result, expected) msg = "Setting mangle_dupe_cols=False is not supported yet" with pytest.raises(ValueError, match=msg): - pd.read_excel(self.path, "test1", header=None, mangle_dupe_cols=False) + pd.read_excel(path, "test1", header=None, mangle_dupe_cols=False) - def test_swapped_columns(self, engine, ext): + def test_swapped_columns(self, path): # Test for issue #5427. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) - write_frame.to_excel(self.path, "test1", columns=["B", "A"]) + write_frame.to_excel(path, "test1", columns=["B", "A"]) - read_frame = pd.read_excel(self.path, "test1", header=0) + read_frame = pd.read_excel(path, "test1", header=0) tm.assert_series_equal(write_frame["A"], read_frame["A"]) tm.assert_series_equal(write_frame["B"], read_frame["B"]) - def test_invalid_columns(self, engine, ext): + def test_invalid_columns(self, path): # see gh-10982 write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - write_frame.to_excel(self.path, "test1", columns=["B", "C"]) + write_frame.to_excel(path, "test1", columns=["B", "C"]) expected = write_frame.reindex(columns=["B", "C"]) - read_frame = pd.read_excel(self.path, "test1", index_col=0) + read_frame = pd.read_excel(path, "test1", index_col=0) tm.assert_frame_equal(expected, read_frame) with pytest.raises( KeyError, match="'passes columns are not ALL present dataframe'" ): - write_frame.to_excel(self.path, "test1", columns=["C", "D"]) + write_frame.to_excel(path, "test1", columns=["C", "D"]) - def test_comment_arg(self, engine, ext): + def test_comment_arg(self, path): # see gh-18735 # # Test the comment argument functionality to pd.read_excel. # Create file to read in. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) - df.to_excel(self.path, "test_c") + df.to_excel(path, "test_c") # Read file without comment arg. - result1 = pd.read_excel(self.path, "test_c", index_col=0) + result1 = pd.read_excel(path, "test_c", index_col=0) result1.iloc[1, 0] = None result1.iloc[1, 1] = None result1.iloc[2, 1] = None - result2 = pd.read_excel(self.path, "test_c", comment="#", index_col=0) + result2 = pd.read_excel(path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result1, result2) - def test_comment_default(self, engine, ext): + def test_comment_default(self, path): # Re issue #18735 # Test the comment argument default to pd.read_excel # Create file to read in df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) - df.to_excel(self.path, "test_c") + df.to_excel(path, "test_c") # Read file with default and explicit comment=None - result1 = pd.read_excel(self.path, "test_c") - result2 = pd.read_excel(self.path, "test_c", comment=None) + result1 = pd.read_excel(path, "test_c") + result2 = pd.read_excel(path, "test_c", comment=None) tm.assert_frame_equal(result1, result2) - def test_comment_used(self, engine, ext): + def test_comment_used(self, path): # see gh-18735 # # Test the comment argument is working as expected when used. # Create file to read in. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) - df.to_excel(self.path, "test_c") + df.to_excel(path, "test_c") # Test read_frame_comment against manually produced expected output. expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]}) - result = pd.read_excel(self.path, "test_c", comment="#", index_col=0) + result = pd.read_excel(path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result, expected) - def test_comment_empty_line(self, engine, ext): + def test_comment_empty_line(self, path): # Re issue #18735 # Test that pd.read_excel ignores commented lines at the end of file df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]}) - df.to_excel(self.path, index=False) + df.to_excel(path, index=False) # Test that all-comment lines at EoF are ignored expected = DataFrame({"a": [1], "b": [2]}) - result = pd.read_excel(self.path, comment="#") + result = pd.read_excel(path, comment="#") tm.assert_frame_equal(result, expected) - def test_datetimes(self, engine, ext): + def test_datetimes(self, path): # Test writing and reading datetimes. For issue #9139. (xref #9185) datetimes = [ @@ -1104,12 +1099,12 @@ def test_datetimes(self, engine, ext): ] write_frame = DataFrame({"A": datetimes}) - write_frame.to_excel(self.path, "Sheet1") - read_frame = pd.read_excel(self.path, "Sheet1", header=0) + write_frame.to_excel(path, "Sheet1") + read_frame = pd.read_excel(path, "Sheet1", header=0) tm.assert_series_equal(write_frame["A"], read_frame["A"]) - def test_bytes_io(self, engine, ext): + def test_bytes_io(self, engine): # see gh-7074 bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) @@ -1123,7 +1118,7 @@ def test_bytes_io(self, engine, ext): reread_df = pd.read_excel(bio, index_col=0) tm.assert_frame_equal(df, reread_df) - def test_write_lists_dict(self, engine, ext): + def test_write_lists_dict(self, path): # see gh-8188. df = DataFrame( { @@ -1132,8 +1127,8 @@ def test_write_lists_dict(self, engine, ext): "str": ["apple", "banana", "cherry"], } ) - df.to_excel(self.path, "Sheet1") - read = pd.read_excel(self.path, "Sheet1", header=0, index_col=0) + df.to_excel(path, "Sheet1") + read = pd.read_excel(path, "Sheet1", header=0, index_col=0) expected = df.copy() expected.mixed = expected.mixed.apply(str) @@ -1141,23 +1136,23 @@ def test_write_lists_dict(self, engine, ext): tm.assert_frame_equal(read, expected) - def test_true_and_false_value_options(self, engine, ext): + def test_true_and_false_value_options(self, path): # see gh-13347 df = pd.DataFrame([["foo", "bar"]], columns=["col1", "col2"]) expected = df.replace({"foo": True, "bar": False}) - df.to_excel(self.path) + df.to_excel(path) read_frame = pd.read_excel( - self.path, true_values=["foo"], false_values=["bar"], index_col=0 + path, true_values=["foo"], false_values=["bar"], index_col=0 ) tm.assert_frame_equal(read_frame, expected) - def test_freeze_panes(self, engine, ext): + def test_freeze_panes(self, path): # see gh-15160 expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) - expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) + expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) - result = pd.read_excel(self.path, index_col=0) + result = pd.read_excel(path, index_col=0) tm.assert_frame_equal(result, expected) def test_path_path_lib(self, engine, ext): @@ -1176,7 +1171,7 @@ def test_path_local_path(self, engine, ext): result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) tm.assert_frame_equal(result, df) - def test_merged_cell_custom_objects(self, engine, merge_cells, ext): + def test_merged_cell_custom_objects(self, merge_cells, path): # see GH-27006 mi = MultiIndex.from_tuples( [ @@ -1185,10 +1180,8 @@ def test_merged_cell_custom_objects(self, engine, merge_cells, ext): ] ) expected = DataFrame(np.ones((2, 2)), columns=mi) - expected.to_excel(self.path) - result = pd.read_excel( - self.path, header=[0, 1], index_col=0, convert_float=False - ) + expected.to_excel(path) + result = pd.read_excel(path, header=[0, 1], index_col=0, convert_float=False) # need to convert PeriodIndexes to standard Indexes for assert equal expected.columns.set_levels( [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]], @@ -1199,18 +1192,18 @@ def test_merged_cell_custom_objects(self, engine, merge_cells, ext): tm.assert_frame_equal(expected, result) @pytest.mark.parametrize("dtype", [None, object]) - def test_raise_when_saving_timezones(self, engine, ext, dtype, tz_aware_fixture): + def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): # GH 27008, GH 7056 tz = tz_aware_fixture data = pd.Timestamp("2019", tz=tz) df = DataFrame([data], dtype=dtype) with pytest.raises(ValueError, match="Excel does not support"): - df.to_excel(self.path) + df.to_excel(path) data = data.to_pydatetime() df = DataFrame([data], dtype=dtype) with pytest.raises(ValueError, match="Excel does not support"): - df.to_excel(self.path) + df.to_excel(path) class TestExcelWriterEngineTests: