From 115a0dfeb70874d8d6fbcddec9d5de3b2c32b36c Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Fri, 2 Aug 2024 15:40:37 -0300 Subject: [PATCH 01/10] feat: implement option 'delete_rows' of argument 'if_exists' in 'DataFrame.to_sql' API. --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/sql.py | 56 ++++++++++++++++++++++++------ pandas/tests/io/test_sql.py | 62 +++++++++++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 102628257d6f2..89139eaea7b75 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -66,6 +66,7 @@ Other enhancements - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) +- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5652d7fab0c7c..818d24965dbe2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -738,7 +738,7 @@ def to_sql( name: str, con, schema: str | None = None, - if_exists: Literal["fail", "replace", "append"] = "fail", + if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail", index: bool = True, index_label: IndexLabel | None = None, chunksize: int | None = None, @@ -764,10 +764,11 @@ def to_sql( schema : str, optional Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). - if_exists : {'fail', 'replace', 'append'}, default 'fail' + if_exists : {'fail', 'replace', 'append', 'delete_rows'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + - delete_rows: If a table exists, delete all records and insert data. index : bool, default True Write DataFrame index as a column. index_label : str or sequence, optional @@ -818,7 +819,7 @@ def to_sql( `sqlite3 `__ or `SQLAlchemy `__ """ # noqa: E501 - if if_exists not in ("fail", "replace", "append"): + if if_exists not in ("fail", "replace", "append", "delete_rows"): raise ValueError(f"'{if_exists}' is not valid for if_exists") if isinstance(frame, Series): @@ -926,7 +927,7 @@ def __init__( pandas_sql_engine, frame=None, index: bool | str | list[str] | None = True, - if_exists: Literal["fail", "replace", "append"] = "fail", + if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail", prefix: str = "pandas", index_label=None, schema=None, @@ -974,11 +975,13 @@ def create(self) -> None: if self.exists(): if self.if_exists == "fail": raise ValueError(f"Table '{self.name}' already exists.") - if self.if_exists == "replace": + elif self.if_exists == "replace": self.pd_sql.drop_table(self.name, self.schema) self._execute_create() elif self.if_exists == "append": pass + elif self.if_exists == "delete_rows": + self.pd_sql.delete_rows(self.name, self.schema) else: raise ValueError(f"'{self.if_exists}' is not valid for if_exists") else: @@ -1480,7 +1483,7 @@ def to_sql( self, frame, name: str, - if_exists: Literal["fail", "replace", "append"] = "fail", + if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail", index: bool = True, index_label=None, schema=None, @@ -1866,7 +1869,7 @@ def prep_table( self, frame, name: str, - if_exists: Literal["fail", "replace", "append"] = "fail", + if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail", index: bool | str | list[str] | None = True, index_label=None, schema=None, @@ -1943,7 +1946,7 @@ def to_sql( self, frame, name: str, - if_exists: Literal["fail", "replace", "append"] = "fail", + if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail", index: bool = True, index_label=None, schema: str | None = None, @@ -1961,10 +1964,11 @@ def to_sql( frame : DataFrame name : string Name of SQL table. - if_exists : {'fail', 'replace', 'append'}, default 'fail' + if_exists : {'fail', 'replace', 'append', 'delete_rows'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + - delete_rows: If a table exists, delete all records and insert data. index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None @@ -2061,6 +2065,18 @@ def drop_table(self, table_name: str, schema: str | None = None) -> None: self.get_table(table_name, schema).drop(bind=self.con) self.meta.clear() + def delete_rows(self, table_name: str, schema: str | None = None) -> None: + schema = schema or self.meta.schema + if self.has_table(table_name, schema): + self.meta.reflect( + bind=self.con, only=[table_name], schema=schema, views=True + ) + with self.run_transaction() as con: + table = self.get_table(table_name, schema) + con.execute(table.delete()) + + self.meta.clear() + def _create_sql_schema( self, frame: DataFrame, @@ -2296,7 +2312,7 @@ def to_sql( self, frame, name: str, - if_exists: Literal["fail", "replace", "append"] = "fail", + if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail", index: bool = True, index_label=None, schema: str | None = None, @@ -2318,6 +2334,7 @@ def to_sql( - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + - delete_rows: If a table exists, delete all records and insert data. index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None @@ -2335,6 +2352,7 @@ def to_sql( engine : {'auto', 'sqlalchemy'}, default 'auto' Raises NotImplementedError if not set to 'auto' """ + if index_label: raise NotImplementedError( "'index_label' is not implemented for ADBC drivers" @@ -2368,6 +2386,9 @@ def to_sql( cur.execute(f"DROP TABLE {table_name}") elif if_exists == "append": mode = "append" + elif if_exists == "delete_rows": + mode = "append" + self.delete_rows(name, schema) import pyarrow as pa @@ -2402,6 +2423,12 @@ def has_table(self, name: str, schema: str | None = None) -> bool: return False + def delete_rows(self, name: str, schema: str | None = None) -> None: + delete_sql = f"DELETE FROM {schema}.{name}" if schema else f"DELETE FROM {name}" + if self.has_table(name, schema): + with self.con.cursor() as cur: + cur.execute(delete_sql) + def _create_sql_schema( self, frame: DataFrame, @@ -2769,10 +2796,11 @@ def to_sql( frame: DataFrame name: string Name of SQL table. - if_exists: {'fail', 'replace', 'append'}, default 'fail' + if_exists: {'fail', 'replace', 'append', 'delete_rows'}, default 'fail' fail: If table exists, do nothing. replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if it does not exist. + delete_rows: If a table exists, delete all records and insert data. index : bool, default True Write DataFrame index as a column index_label : string or sequence, default None @@ -2848,6 +2876,12 @@ def drop_table(self, name: str, schema: str | None = None) -> None: drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" self.execute(drop_sql) + def delete_rows(self, name: str, schema: str | None = None) -> None: + delete_sql = f"DELETE FROM {_get_valid_sqlite_name(name)}" + if self.has_table(name, schema): + with self.run_transaction() as cur: + cur.execute(delete_sql) + def _create_sql_schema( self, frame, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 7e1220ecee218..0a80959e1ec72 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1068,7 +1068,9 @@ def test_to_sql(conn, method, test_frame1, request): @pytest.mark.parametrize("conn", all_connectable) -@pytest.mark.parametrize("mode, num_row_coef", [("replace", 1), ("append", 2)]) +@pytest.mark.parametrize( + "mode, num_row_coef", [("replace", 1), ("append", 2), ("delete_rows", 1)] +) def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request): conn = request.getfixturevalue(conn) with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: @@ -2698,6 +2700,64 @@ def test_drop_table(conn, request): assert not insp.has_table("temp_frame") +@pytest.mark.parametrize("conn", all_connectable) +def test_delete_rows_success(conn, test_frame1, request): + table_name = "temp_frame" + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + + with pandasSQL.run_transaction(): + assert pandasSQL.to_sql(test_frame1, table_name) == test_frame1.shape[0] + + with pandasSQL.run_transaction(): + assert pandasSQL.delete_rows(table_name) is None + + assert count_rows(conn, table_name) == 0 + assert pandasSQL.has_table("temp_frame") + + +@pytest.mark.parametrize("conn", all_connectable) +def test_delete_rows_is_atomic(conn, request): + adbc_driver_manager = pytest.importorskip("adbc_driver_manager") + sqlalchemy = pytest.importorskip("sqlalchemy") + + if "sqlite" in conn: + reason = "This test relies on strict column types, SQLite has a dynamic one" + request.applymarker( + pytest.mark.xfail( + reason=reason, + strict=True, + ) + ) + + table_name = "temp_frame" + original_df = DataFrame({"a": [1, 2, 3]}) + replacing_df = DataFrame({"a": ["a", "b", "c", "d"]}) + + conn = request.getfixturevalue(conn) + pandasSQL = pandasSQL_builder(conn) + + if isinstance(conn, adbc_driver_manager.dbapi.Connection): + expected_exception = adbc_driver_manager.ProgrammingError + else: + expected_exception = sqlalchemy.exc.DataError + + with pandasSQL.run_transaction(): + pandasSQL.to_sql(original_df, table_name, if_exists="fail", index=False) + + # trying to insert strings in an integer column + with pytest.raises(expected_exception): + with pandasSQL.run_transaction(): + pandasSQL.to_sql( + replacing_df, table_name, if_exists="delete_rows", index=False + ) + + # failed "delete_rows" is rolled back preserving original data + with pandasSQL.run_transaction(): + result_df = pandasSQL.read_query(f"SELECT * FROM {table_name}") + tm.assert_frame_equal(result_df, original_df) + + @pytest.mark.parametrize("conn", all_connectable) def test_roundtrip(conn, request, test_frame1): if conn == "sqlite_str": From fde8d29fdb49f06fb602c3aa8e595ee44676008d Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:21:20 -0300 Subject: [PATCH 02/10] Apply suggestions from code review Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/sql.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 818d24965dbe2..4d41fd891463e 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2352,7 +2352,6 @@ def to_sql( engine : {'auto', 'sqlalchemy'}, default 'auto' Raises NotImplementedError if not set to 'auto' """ - if index_label: raise NotImplementedError( "'index_label' is not implemented for ADBC drivers" @@ -2424,10 +2423,10 @@ def has_table(self, name: str, schema: str | None = None) -> bool: return False def delete_rows(self, name: str, schema: str | None = None) -> None: - delete_sql = f"DELETE FROM {schema}.{name}" if schema else f"DELETE FROM {name}" + table_name = f"{schema}.{name}" if schema else name if self.has_table(name, schema): with self.con.cursor() as cur: - cur.execute(delete_sql) + cur.execute(f"DELETE FROM {table_name)") def _create_sql_schema( self, From 97bceafe774aa4a528f60c4c9ffddbf74a6c243a Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Fri, 3 Jan 2025 11:35:04 -0300 Subject: [PATCH 03/10] reworked tests to include sqlite cases --- pandas/io/sql.py | 5 ++--- pandas/tests/io/test_sql.py | 44 ++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 4d41fd891463e..1f8e418172867 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2426,7 +2426,7 @@ def delete_rows(self, name: str, schema: str | None = None) -> None: table_name = f"{schema}.{name}" if schema else name if self.has_table(name, schema): with self.con.cursor() as cur: - cur.execute(f"DELETE FROM {table_name)") + cur.execute(f"DELETE FROM {table_name}") def _create_sql_schema( self, @@ -2878,8 +2878,7 @@ def drop_table(self, name: str, schema: str | None = None) -> None: def delete_rows(self, name: str, schema: str | None = None) -> None: delete_sql = f"DELETE FROM {_get_valid_sqlite_name(name)}" if self.has_table(name, schema): - with self.run_transaction() as cur: - cur.execute(delete_sql) + self.execute(delete_sql) def _create_sql_schema( self, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0a80959e1ec72..06baf3f547c6d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2716,36 +2716,40 @@ def test_delete_rows_success(conn, test_frame1, request): assert pandasSQL.has_table("temp_frame") -@pytest.mark.parametrize("conn", all_connectable) -def test_delete_rows_is_atomic(conn, request): +@pytest.mark.parametrize("conn_name", all_connectable) +def test_delete_rows_is_atomic(conn_name, request): adbc_driver_manager = pytest.importorskip("adbc_driver_manager") sqlalchemy = pytest.importorskip("sqlalchemy") - if "sqlite" in conn: - reason = "This test relies on strict column types, SQLite has a dynamic one" - request.applymarker( - pytest.mark.xfail( - reason=reason, - strict=True, - ) - ) - table_name = "temp_frame" - original_df = DataFrame({"a": [1, 2, 3]}) - replacing_df = DataFrame({"a": ["a", "b", "c", "d"]}) + table_stmt = f"CREATE TABLE {table_name} (a INTEGER, b INTEGER UNIQUE NOT NULL)" - conn = request.getfixturevalue(conn) + if conn_name != "sqlite_buildin" and "adbc" not in conn_name: + table_stmt = sqlalchemy.text(table_stmt) + + # setting dtype is mandatory for adbc related tests + original_df = DataFrame({"a": [1, 2], "b": [3, 4]}, dtype="int32") + replacing_df = DataFrame({"a": [5, 6], "b": [7, 7]}, dtype="int32") + + conn = request.getfixturevalue(conn_name) pandasSQL = pandasSQL_builder(conn) - if isinstance(conn, adbc_driver_manager.dbapi.Connection): + with pandasSQL.run_transaction() as cur: + cur.execute(table_stmt) + + if conn_name != "sqlite_buildin" and "adbc" not in conn_name: + expected_exception = sqlalchemy.exc.IntegrityError + elif "adbc" in conn_name and "sqlite" in conn_name: + expected_exception = adbc_driver_manager.InternalError + elif "adbc" in conn_name and "postgres" in conn_name: expected_exception = adbc_driver_manager.ProgrammingError - else: - expected_exception = sqlalchemy.exc.DataError + elif conn_name == "sqlite_buildin": + expected_exception = sqlite3.IntegrityError with pandasSQL.run_transaction(): - pandasSQL.to_sql(original_df, table_name, if_exists="fail", index=False) + pandasSQL.to_sql(original_df, table_name, if_exists="append", index=False) - # trying to insert strings in an integer column + # inserting duplicated values in a UNIQUE constraint column with pytest.raises(expected_exception): with pandasSQL.run_transaction(): pandasSQL.to_sql( @@ -2754,7 +2758,7 @@ def test_delete_rows_is_atomic(conn, request): # failed "delete_rows" is rolled back preserving original data with pandasSQL.run_transaction(): - result_df = pandasSQL.read_query(f"SELECT * FROM {table_name}") + result_df = pandasSQL.read_query(f"SELECT * FROM {table_name}", dtype="int32") tm.assert_frame_equal(result_df, original_df) From 8d8813265dedd81aff0d846fa6ec9c24ea81e4dd Mon Sep 17 00:00:00 2001 From: avecasey <102306692+avecasey@users.noreply.github.com> Date: Thu, 2 Jan 2025 16:47:32 -0500 Subject: [PATCH 04/10] ENH: Added isascii() string method fixing issue #59091 (#60532) * first * second * Update object_array.py * third * ascii * ascii2 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * style * style * style * style * docs * reset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update doc/source/whatsnew/v3.0.0.rst --------- Co-authored-by: Abby VeCasey Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 89139eaea7b75..2b41dd9559542 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -67,6 +67,7 @@ Other enhancements - :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). +- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) From e68b07052d1900961f29bcbe54787a23765210b2 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Fri, 3 Jan 2025 11:47:21 -0300 Subject: [PATCH 05/10] merged readme --- doc/source/whatsnew/v3.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2b41dd9559542..26c77b62470ab 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -65,7 +65,6 @@ Other enhancements - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) -- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) From 459dcc972a587099181aff724891b61603f68057 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:32:52 -0300 Subject: [PATCH 06/10] docs: add warning in 'to_sql' --- pandas/core/generic.py | 6 ++++++ pandas/io/sql.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index de7fb3682fb4f..8c63ea698661a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2795,6 +2795,12 @@ def to_sql( Databases supported by SQLAlchemy [1]_ are supported. Tables can be newly created, appended to, or overwritten. + .. warning:: + The pandas library does not attempt to sanitize inputs provided via a to_sql call. + Please refer to the documentation for the underlying database driver to see if it + will properly prevent injection, or alternatively be advised of a security risk when + executing arbitrary commands in a to_sql call. + Parameters ---------- name : str diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1f8e418172867..bcfa4f05786f7 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -750,6 +750,12 @@ def to_sql( """ Write records stored in a DataFrame to a SQL database. + .. warning:: + The pandas library does not attempt to sanitize inputs provided via a to_sql call. + Please refer to the documentation for the underlying database driver to see if it + will properly prevent injection, or alternatively be advised of a security risk when + executing arbitrary commands in a to_sql call. + Parameters ---------- frame : DataFrame, Series From 496b30c8229760d14c4e23b13f4012c06fce6af7 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Sun, 5 Jan 2025 17:51:04 -0300 Subject: [PATCH 07/10] refactor: rewrite test --- pandas/tests/io/test_sql.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 06baf3f547c6d..dcd011df4b19b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2700,20 +2700,20 @@ def test_drop_table(conn, request): assert not insp.has_table("temp_frame") -@pytest.mark.parametrize("conn", all_connectable) -def test_delete_rows_success(conn, test_frame1, request): +@pytest.mark.parametrize("conn_name", all_connectable) +def test_delete_rows_success(conn_name, test_frame1, request): table_name = "temp_frame" - conn = request.getfixturevalue(conn) - pandasSQL = pandasSQL_builder(conn) + conn = request.getfixturevalue(conn_name) - with pandasSQL.run_transaction(): - assert pandasSQL.to_sql(test_frame1, table_name) == test_frame1.shape[0] + with pandasSQL_builder(conn) as pandasSQL: + with pandasSQL.run_transaction(): + assert pandasSQL.to_sql(test_frame1, table_name) == test_frame1.shape[0] - with pandasSQL.run_transaction(): - assert pandasSQL.delete_rows(table_name) is None + with pandasSQL.run_transaction(): + assert pandasSQL.delete_rows(table_name) is None - assert count_rows(conn, table_name) == 0 - assert pandasSQL.has_table("temp_frame") + assert count_rows(conn, table_name) == 0 + assert pandasSQL.has_table("temp_frame") @pytest.mark.parametrize("conn_name", all_connectable) From 4d6ec631f9224bf02f54b5389d8b7537c71503c8 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Wed, 8 Jan 2025 19:16:14 -0300 Subject: [PATCH 08/10] wip - trying out new solution --- pandas/io/sql.py | 35 +++++++++++++++++++++++------------ pandas/tests/io/test_sql.py | 16 +++------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index bcfa4f05786f7..029cf984b5ce1 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1006,7 +1006,7 @@ def _execute_insert(self, conn, keys: list[str], data_iter) -> int: Each item contains a list of values to be inserted """ data = [dict(zip(keys, row)) for row in data_iter] - result = conn.execute(self.table.insert(), data) + result = self.pd_sql.execute(self.table.insert(), data) return result.rowcount def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: @@ -1023,7 +1023,7 @@ def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: data = [dict(zip(keys, row)) for row in data_iter] stmt = insert(self.table).values(data) - result = conn.execute(stmt) + result = self.pd_sql.execute(stmt) return result.rowcount def insert_data(self) -> tuple[list[str], list[np.ndarray]]: @@ -1662,8 +1662,14 @@ def execute(self, sql: str | Select | TextClause, params=None): """Simple passthrough to SQLAlchemy connectable""" args = [] if params is None else [params] if isinstance(sql, str): - return self.con.exec_driver_sql(sql, *args) - return self.con.execute(sql, *args) + execute_function = self.con.exec_driver_sql + else: + execute_function = self.con.execute + + try: + return execute_function(sql, *args) + except Exception as exc: + raise DatabaseError(f"Execution failed on sql '{sql}': {exc}") from exc def read_table( self, @@ -2077,9 +2083,9 @@ def delete_rows(self, table_name: str, schema: str | None = None) -> None: self.meta.reflect( bind=self.con, only=[table_name], schema=schema, views=True ) - with self.run_transaction() as con: + with self.run_transaction(): table = self.get_table(table_name, schema) - con.execute(table.delete()) + self.execute(table.delete()) self.meta.clear() @@ -2403,9 +2409,12 @@ def to_sql( raise ValueError("datatypes not supported") from exc with self.con.cursor() as cur: - total_inserted = cur.adbc_ingest( - table_name=name, data=tbl, mode=mode, db_schema_name=schema - ) + try: + total_inserted = cur.adbc_ingest( + table_name=name, data=tbl, mode=mode, db_schema_name=schema + ) + except Exception as exc: + raise DatabaseError("Execution failed") from exc self.con.commit() return total_inserted @@ -2431,8 +2440,7 @@ def has_table(self, name: str, schema: str | None = None) -> bool: def delete_rows(self, name: str, schema: str | None = None) -> None: table_name = f"{schema}.{name}" if schema else name if self.has_table(name, schema): - with self.con.cursor() as cur: - cur.execute(f"DELETE FROM {table_name}") + self.execute(f"DELETE FROM {table_name}").close() def _create_sql_schema( self, @@ -2553,7 +2561,10 @@ def insert_statement(self, *, num_rows: int) -> str: def _execute_insert(self, conn, keys, data_iter) -> int: data_list = list(data_iter) - conn.executemany(self.insert_statement(num_rows=1), data_list) + try: + conn.executemany(self.insert_statement(num_rows=1), data_list) + except Exception as exc: + raise DatabaseError("Execution failed") from exc return conn.rowcount def _execute_insert_multi(self, conn, keys, data_iter) -> int: diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index dcd011df4b19b..29aed42f8b973 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2718,7 +2718,6 @@ def test_delete_rows_success(conn_name, test_frame1, request): @pytest.mark.parametrize("conn_name", all_connectable) def test_delete_rows_is_atomic(conn_name, request): - adbc_driver_manager = pytest.importorskip("adbc_driver_manager") sqlalchemy = pytest.importorskip("sqlalchemy") table_name = "temp_frame" @@ -2737,20 +2736,11 @@ def test_delete_rows_is_atomic(conn_name, request): with pandasSQL.run_transaction() as cur: cur.execute(table_stmt) - if conn_name != "sqlite_buildin" and "adbc" not in conn_name: - expected_exception = sqlalchemy.exc.IntegrityError - elif "adbc" in conn_name and "sqlite" in conn_name: - expected_exception = adbc_driver_manager.InternalError - elif "adbc" in conn_name and "postgres" in conn_name: - expected_exception = adbc_driver_manager.ProgrammingError - elif conn_name == "sqlite_buildin": - expected_exception = sqlite3.IntegrityError - with pandasSQL.run_transaction(): pandasSQL.to_sql(original_df, table_name, if_exists="append", index=False) # inserting duplicated values in a UNIQUE constraint column - with pytest.raises(expected_exception): + with pytest.raises(pd.errors.DatabaseError): with pandasSQL.run_transaction(): pandasSQL.to_sql( replacing_df, table_name, if_exists="delete_rows", index=False @@ -3473,8 +3463,8 @@ def test_to_sql_with_negative_npinf(conn, request, input): mark = pytest.mark.xfail(reason="GH 36465") request.applymarker(mark) - msg = "inf cannot be used with MySQL" - with pytest.raises(ValueError, match=msg): + msg = "Execution failed on sql" + with pytest.raises(pd.errors.DatabaseError, match=msg): df.to_sql(name="foobar", con=conn, index=False) else: assert df.to_sql(name="foobar", con=conn, index=False) == 1 From f79e0d36fdab073db3d2b36dd95bd4a854dbdb0b Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:34:19 -0300 Subject: [PATCH 09/10] chore: add a new row to 'replacing_df' so we don't get biased --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 29aed42f8b973..b692bce7e5950 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2728,7 +2728,7 @@ def test_delete_rows_is_atomic(conn_name, request): # setting dtype is mandatory for adbc related tests original_df = DataFrame({"a": [1, 2], "b": [3, 4]}, dtype="int32") - replacing_df = DataFrame({"a": [5, 6], "b": [7, 7]}, dtype="int32") + replacing_df = DataFrame({"a": [5, 6, 7], "b": [8, 8, 8]}, dtype="int32") conn = request.getfixturevalue(conn_name) pandasSQL = pandasSQL_builder(conn) From b0c2eff69abdcd15718ce25980a8f6ac5a4292ff Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Fri, 17 Jan 2025 17:55:30 -0300 Subject: [PATCH 10/10] chore: add Delete to execute typing --- pandas/io/sql.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 029cf984b5ce1..7b360cbf869e4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -76,6 +76,7 @@ from sqlalchemy import Table from sqlalchemy.sql.expression import ( + Delete, Select, TextClause, ) @@ -1658,7 +1659,7 @@ def run_transaction(self): else: yield self.con - def execute(self, sql: str | Select | TextClause, params=None): + def execute(self, sql: str | Select | TextClause | Delete, params=None): """Simple passthrough to SQLAlchemy connectable""" args = [] if params is None else [params] if isinstance(sql, str):