From e9da072f87134e4ef02495cc9f39fac267ee8ff0 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Sun, 26 Jan 2025 18:30:27 +0100 Subject: [PATCH] Add ci check for idempotency in sql scripts To detect the problematic patterns that were part of the 2.18 release we can check the sql scripts against a list of allowed statements. Any non idempotent operation should be in the pre_install scripts and not the scripts that get appended for the update scripts. --- .github/workflows/catalog-updates-check.yaml | 6 + scripts/check_sql_script.py | 138 +++++++++++++++++++ scripts/check_updates_ast.py | 6 + sql/metadata.sql | 4 +- 4 files changed, 151 insertions(+), 3 deletions(-) create mode 100644 scripts/check_sql_script.py diff --git a/.github/workflows/catalog-updates-check.yaml b/.github/workflows/catalog-updates-check.yaml index 893a7a59447..e1f234093da 100644 --- a/.github/workflows/catalog-updates-check.yaml +++ b/.github/workflows/catalog-updates-check.yaml @@ -22,9 +22,15 @@ jobs: run: | python scripts/check_updates_ast.py --latest "sql/updates/latest-dev.sql" + - name: Check for idempotency in SQL scripts + if: always() + run: | + python scripts/check_sql_script.py sql/*.sql + # To allow fixing previous mistakes we run the check against reverse-dev but don't # fail it on errors. - name: Check reverse-dev contents if: always() run: | python scripts/check_updates_ast.py "sql/updates/reverse-dev.sql" || true + diff --git a/scripts/check_sql_script.py b/scripts/check_sql_script.py new file mode 100644 index 00000000000..983c4c5693f --- /dev/null +++ b/scripts/check_sql_script.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python + +# Check SQL script components for problematic patterns. This script is +# intended to be run on the scripts that are added to every update script, +# but not the compiled update script or the pre_install scripts. +# +# This script will find patterns that are not idempotent and therefore +# should be moved to the pre_install part. + +from pglast import parse_sql +from pglast.visitors import Visitor, Skip, Continue +from pglast.stream import RawStream +import sys +import re +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("filename", type=argparse.FileType("r"), nargs="+") +args = parser.parse_args() + + +class SQLVisitor(Visitor): + def __init__(self, file): + self.errors = 0 + self.file = file + super().__init__() + + def error(self, node, hint): + self.errors += 1 + print( + f"Invalid statement found in sql script({self.file}):\n", + RawStream()(node), + ) + print(hint, "\n") + + def visit_RawStmt(self, _ancestors, _node): + # Statements are nested in RawStmt so we need to let the visitor descend + return Continue + + def visit(self, _ancestors, node): + self.error(node, "Consider moving the statement into a pre_install script") + + # We are only interested in checking top-level statements + return Skip + + def visit_CommentStmt(self, _ancestors, _node): + return Skip + + def visit_GrantStmt(self, _ancestors, _node): + return Skip + + def visit_SelectStmt(self, _ancestors, _node): + return Skip + + def visit_InsertStmt(self, _ancestors, _node): + return Skip + + def visit_DeleteStmt(self, _ancestors, _node): + return Skip + + def visit_DoStmt(self, _ancestors, _node): + return Skip + + def visit_CreateEventTrigStmt(self, _ancestors, _node): + return Skip + + def visit_CreateTrigStmt(self, _ancestors, node): + if not node.replace: + self.error(node, "Consider using CREATE OR REPLACE TRIGGER") + + return Skip + + def visit_DefineStmt(self, _ancestors, node): + if not node.replace: + self.error(node, "Consider using CREATE OR REPLACE") + + return Skip + + def visit_DropStmt(self, _ancestors, node): + if not node.missing_ok: + self.error(node, "Consider using DROP IF EXISTS") + + return Skip + + def visit_ViewStmt(self, _ancestors, node): + if not node.replace: + self.error(node, "Consider using CREATE OR REPLACE VIEW") + + return Skip + + def visit_CreateFunctionStmt(self, _ancestors, node): + if not node.replace: + self.error(node, "Consider using CREATE OR REPLACE FUNCTION") + + return Skip + + +# copied from pgspot +def visit_sql(sql, file): + # @extschema@ is placeholder in extension scripts for + # the schema the extension gets installed in + sql = sql.replace("@extschema@", "extschema") + sql = sql.replace("@extowner@", "extowner") + sql = sql.replace("@database_owner@", "database_owner") + # postgres contrib modules are protected by psql meta commands to + # prevent running extension files in psql. + # The SQL parser will error on those since they are not valid + # SQL, so we comment out all psql meta commands before parsing. + sql = re.sub(r"^\\", "-- \\\\", sql, flags=re.MULTILINE) + + visitor = SQLVisitor(file) + for stmt in parse_sql(sql): + visitor(stmt) + return visitor.errors + + +def main(args): + errors = 0 + error_files = [] + for file in args.filename: + sql = file.read() + result = visit_sql(sql, file.name) + if result > 0: + errors += result + error_files.append(file.name) + + if errors > 0: + numbering = "errors" if errors > 1 else "error" + print( + f"{errors} {numbering} detected in {len(error_files)} files({', '.join(error_files)})" + ) + sys.exit(1) + sys.exit(0) + + +if __name__ == "__main__": + main(args) + sys.exit(0) diff --git a/scripts/check_updates_ast.py b/scripts/check_updates_ast.py index b6b7a73ac69..3c505de68aa 100644 --- a/scripts/check_updates_ast.py +++ b/scripts/check_updates_ast.py @@ -1,3 +1,9 @@ +#!/usr/bin/env python + +# Check SQL update script for undesirable patterns. This script is +# intended to be run on the compiled update script or subsets of +# the update script (e.g. latest-dev.sql and reverse-dev.sql) + from pglast import parse_sql from pglast.ast import ColumnDef from pglast.visitors import Visitor diff --git a/sql/metadata.sql b/sql/metadata.sql index eabb4f2702c..39a0b77a71c 100644 --- a/sql/metadata.sql +++ b/sql/metadata.sql @@ -21,9 +21,7 @@ BEGIN END $$ SET search_path TO pg_catalog, pg_temp; --- CREATE OR REPLACE TRIGGER is PG14+ only -DROP TRIGGER IF EXISTS metadata_insert_trigger ON _timescaledb_catalog.metadata; -CREATE TRIGGER metadata_insert_trigger BEFORE INSERT ON _timescaledb_catalog.metadata FOR EACH ROW EXECUTE PROCEDURE _timescaledb_functions.metadata_insert_trigger(); +CREATE OR REPLACE TRIGGER metadata_insert_trigger BEFORE INSERT ON _timescaledb_catalog.metadata FOR EACH ROW EXECUTE PROCEDURE _timescaledb_functions.metadata_insert_trigger(); -- Insert uuid and install_timestamp on database creation since the trigger -- will turn these into UPDATEs on conflicts we can't use ON CONFLICT DO NOTHING.