From 5e96d8eae4dbf70fa1fc521500f71e5e9626ca69 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 3 Apr 2024 14:48:04 +0200 Subject: [PATCH 1/4] Initial commit --- python/pyarrow/_dataset_parquet.pyx | 3 +++ python/pyarrow/tests/test_dataset_encryption.py | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 58ef6145cf7d1..007970d8361dd 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -198,6 +198,9 @@ cdef class ParquetFileFormat(FileFormat): ------- pyarrow.dataset.FileWriteOptions """ + if not isinstance(self, ParquetFileFormat): + raise TypeError("pyarrow.dataset.ParquetFileFormat() must be initiated" + " before calling make_write_options()") opts = FileFormat.make_write_options(self) ( opts).update(**kwargs) return opts diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py index 2a631db9fc0fa..f229c249072a8 100644 --- a/python/pyarrow/tests/test_dataset_encryption.py +++ b/python/pyarrow/tests/test_dataset_encryption.py @@ -215,3 +215,15 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes: dataset = ds.dataset(path, format=file_format, filesystem=mockfs) new_table = dataset.to_table() assert table == new_table + + +def test_make_write_options_error(): + # GH-39440 + msg = "ParquetFileFormat\\(\\) must be initiated before calling make_write_options" + with pytest.raises(TypeError, match=msg): + pa.dataset.ParquetFileFormat.make_write_options(43) + + pformat = pa.dataset.ParquetFileFormat() + msg = "make_write_options\\(\\) takes exactly 0 positional arguments" + with pytest.raises(TypeError, match=msg): + pformat.make_write_options(43) From b09cae7119d00388d70c9054a9fcdeae31caafb2 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 4 Apr 2024 18:02:43 +0200 Subject: [PATCH 2/4] Add comment about the rationale --- python/pyarrow/_dataset_parquet.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 007970d8361dd..2523bb2ab826c 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -198,6 +198,7 @@ cdef class ParquetFileFormat(FileFormat): ------- pyarrow.dataset.FileWriteOptions """ + # Safeguard from calling make_write_options as a static class method if not isinstance(self, ParquetFileFormat): raise TypeError("pyarrow.dataset.ParquetFileFormat() must be initiated" " before calling make_write_options()") From d2c6efbe6efda0fa496b12f86fccda3e5319cd8e Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 4 Apr 2024 18:32:10 +0200 Subject: [PATCH 3/4] Update python/pyarrow/_dataset_parquet.pyx Co-authored-by: Antoine Pitrou --- python/pyarrow/_dataset_parquet.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 2523bb2ab826c..a55e889ba8246 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -200,8 +200,8 @@ cdef class ParquetFileFormat(FileFormat): """ # Safeguard from calling make_write_options as a static class method if not isinstance(self, ParquetFileFormat): - raise TypeError("pyarrow.dataset.ParquetFileFormat() must be initiated" - " before calling make_write_options()") + raise TypeError("make_write_options() should be called on " + "an instance of ParquetFileFormat") opts = FileFormat.make_write_options(self) ( opts).update(**kwargs) return opts From 5e895f2ed9d9a33ce81bf8740212ea9af98d71e8 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 4 Apr 2024 18:58:37 +0200 Subject: [PATCH 4/4] Update test and move to test_dataset.py --- python/pyarrow/tests/test_dataset.py | 13 +++++++++++++ python/pyarrow/tests/test_dataset_encryption.py | 12 ------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 3d77214c174c5..6bba7240c05df 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -5630,3 +5630,16 @@ def test_checksum_write_dataset_read_dataset_to_table(tempdir): corrupted_dir_path, format=pq_read_format_crc ).to_table() + + +def test_make_write_options_error(): + # GH-39440 + msg = ("make_write_options\\(\\) should be called on an " + "instance of ParquetFileFormat") + with pytest.raises(TypeError, match=msg): + pa.dataset.ParquetFileFormat.make_write_options(43) + + pformat = pa.dataset.ParquetFileFormat() + msg = "make_write_options\\(\\) takes exactly 0 positional arguments" + with pytest.raises(TypeError, match=msg): + pformat.make_write_options(43) diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py index f229c249072a8..2a631db9fc0fa 100644 --- a/python/pyarrow/tests/test_dataset_encryption.py +++ b/python/pyarrow/tests/test_dataset_encryption.py @@ -215,15 +215,3 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes: dataset = ds.dataset(path, format=file_format, filesystem=mockfs) new_table = dataset.to_table() assert table == new_table - - -def test_make_write_options_error(): - # GH-39440 - msg = "ParquetFileFormat\\(\\) must be initiated before calling make_write_options" - with pytest.raises(TypeError, match=msg): - pa.dataset.ParquetFileFormat.make_write_options(43) - - pformat = pa.dataset.ParquetFileFormat() - msg = "make_write_options\\(\\) takes exactly 0 positional arguments" - with pytest.raises(TypeError, match=msg): - pformat.make_write_options(43)