From 16eed43477cff950462f93ba3351dc271cfbe066 Mon Sep 17 00:00:00 2001 From: Bryce Plunkett <130488190+bplunkett-stripe@users.noreply.github.com> Date: Thu, 11 Jan 2024 08:49:28 -0800 Subject: [PATCH] Remove extraneous `NOT NULL` statement generation. Update README (#89) * Fix not null generation. Add example schema * Reword comment --- README.md | 62 ++++++++++++++----- .../partitioned_index_cases_test.go | 10 +-- pkg/diff/sql_generator.go | 17 +++++ 3 files changed, 65 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index e5246aa..fc1f4a8 100644 --- a/README.md +++ b/README.md @@ -6,36 +6,68 @@ The tooling attempts to use native postgres migration operations to perform onli be lock-free and some might require downtime, but the hazards system will warn you ahead of time when that's the case. Stateful online migration techniques, like shadow tables, aren't yet supported. +### Online index Replacement Your project's diff: ``` $ git diff diff --git a/schema/schema.sql b/schema/schema.sql -index 062816a..08a6a40 100644 +index cc3a14b..cf4b32d 100644 --- a/schema/schema.sql +++ b/schema/schema.sql -@@ -4,4 +4,4 @@ CREATE TABLE foobar( - sender text, - created_at timestamp +@@ -2,5 +2,5 @@ CREATE TABLE foobar( + created_at timestamp, + message text ); -CREATE INDEX message_idx ON foobar(message); +CREATE INDEX message_idx ON foobar(message, created_at); ``` -The generated plan (*online index replacement, i.e., queries using `message_idx` will always have an index backing them*): +The generated plan (*queries using `message_idx` will always have an index backing them, even while the new index is being built*): ``` $ pg-schema-diff plan --dsn "postgres://postgres:postgres@localhost:5432/postgres" --schema-dir ./schema ################################ Generated plan ################################ -1. ALTER INDEX "message_idx" RENAME TO "message_idx_0ef03fcb-2368-4d57-9c81-f44cb58e7325"; - -- Statement Timeout: 3s +1. ALTER INDEX "message_idx" RENAME TO "pgschemadiff_tmpidx_message_idx_IiaKzkvPQtyA7ob9piVqiQ"; + -- Statement Timeout: 3s 2. CREATE INDEX CONCURRENTLY message_idx ON public.foobar USING btree (message, created_at); - -- Statement Timeout: 20m0s - -- Lock Timeout: 3s - -- Hazard INDEX_BUILD: This might affect database performance. Concurrent index builds require a non-trivial amount of CPU, potentially affecting database performance. They also can take a while but do not lock out writes. + -- Statement Timeout: 20m0s + -- Lock Timeout: 3s + -- Hazard INDEX_BUILD: This might affect database performance. Concurrent index builds require a non-trivial amount of CPU, potentially affecting database performance. They also can take a while but do not lock out writes. + +3. DROP INDEX CONCURRENTLY "pgschemadiff_tmpidx_message_idx_IiaKzkvPQtyA7ob9piVqiQ"; + -- Statement Timeout: 20m0s + -- Lock Timeout: 3s + -- Hazard INDEX_DROPPED: Dropping this index means queries that use this index might perform worse because they will no longer will be able to leverage it. +``` +### Online `NOT NULL` constraint creation +Your project's diff: +``` +diff --git a/schema/schema.sql b/schema/schema.sql +index cc3a14b..5a1cec2 100644 +--- a/schema/schema.sql ++++ b/schema/schema.sql +@@ -1,5 +1,5 @@ + CREATE TABLE foobar( +- created_at timestamp, ++ created_at timestamp NOT NULL, + message text + ); + CREATE INDEX message_idx ON foobar(message); +``` +The generated plan (*leverages check constraints to eliminate the need for a long-lived access-exclusive lock on the table*): +``` +$ pg-schema-diff plan --dsn "postgres://postgres:postgres@localhost:5432/postgres" --schema-dir ./schema +################################ Generated plan ################################ +1. ALTER TABLE "public"."foobar" ADD CONSTRAINT "pgschemadiff_tmpnn_BCOxMXqAQwaXlKPCRXoMMg" CHECK("created_at" IS NOT NULL) NOT VALID; + -- Statement Timeout: 3s + +2. ALTER TABLE "public"."foobar" VALIDATE CONSTRAINT "pgschemadiff_tmpnn_BCOxMXqAQwaXlKPCRXoMMg"; + -- Statement Timeout: 3s + +3. ALTER TABLE "public"."foobar" ALTER COLUMN "created_at" SET NOT NULL; + -- Statement Timeout: 3s -3. DROP INDEX CONCURRENTLY "message_idx_0ef03fcb-2368-4d57-9c81-f44cb58e7325"; - -- Statement Timeout: 20m0s - -- Lock Timeout: 3s - -- Hazard INDEX_DROPPED: Dropping this index means queries that use this index might perform worse because they will no longer will be able to leverage it. +4. ALTER TABLE "public"."foobar" DROP CONSTRAINT "pgschemadiff_tmpnn_BCOxMXqAQwaXlKPCRXoMMg"; + -- Statement Timeout: 3s ``` # Key features @@ -45,7 +77,7 @@ $ pg-schema-diff plan --dsn "postgres://postgres:postgres@localhost:5432/postgre * Online index replacement: If some index is changed, the new version will be built before the old version is dropped, preventing a window where no index is backing queries * Online check constraint builds: Check constraints are added as `INVALID` before being validated, eliminating the need for a long access-exclusive lock on the table - * Online `ALTER ... SET NOT NULL` using check constraints to eliminate the need for an access-exclusive lock on the table + * Online `NOT NULL` constraint creation using check constraints to eliminate the need for an access-exclusive lock on the table * Prioritized index builds: Building new indexes is always prioritized over deleting old indexes * A comprehensive set of features to ensure the safety of planned migrations: * Operators warned of dangerous operations. diff --git a/internal/migration_acceptance_tests/partitioned_index_cases_test.go b/internal/migration_acceptance_tests/partitioned_index_cases_test.go index 2b7072c..b745934 100644 --- a/internal/migration_acceptance_tests/partitioned_index_cases_test.go +++ b/internal/migration_acceptance_tests/partitioned_index_cases_test.go @@ -193,7 +193,6 @@ var partitionedIndexAcceptanceTestCases = []acceptanceTestCase{ `, }, expectedHazardTypes: []diff.MigrationHazardType{ - diff.MigrationHazardTypeAcquiresAccessExclusiveLock, diff.MigrationHazardTypeIndexBuild, }, }, @@ -281,7 +280,6 @@ var partitionedIndexAcceptanceTestCases = []acceptanceTestCase{ `, }, expectedHazardTypes: []diff.MigrationHazardType{ - diff.MigrationHazardTypeAcquiresAccessExclusiveLock, diff.MigrationHazardTypeIndexBuild, }, }, @@ -308,7 +306,6 @@ var partitionedIndexAcceptanceTestCases = []acceptanceTestCase{ `, }, expectedHazardTypes: []diff.MigrationHazardType{ - diff.MigrationHazardTypeAcquiresAccessExclusiveLock, diff.MigrationHazardTypeIndexDropped, diff.MigrationHazardTypeIndexBuild, }, @@ -364,7 +361,6 @@ var partitionedIndexAcceptanceTestCases = []acceptanceTestCase{ `, }, expectedHazardTypes: []diff.MigrationHazardType{ - diff.MigrationHazardTypeAcquiresAccessExclusiveLock, diff.MigrationHazardTypeIndexBuild, }, }, @@ -554,9 +550,9 @@ var partitionedIndexAcceptanceTestCases = []acceptanceTestCase{ CREATE INDEX new_foobar_1_some_local_idx ON foobar_1(foo, bar, id); `}, expectedHazardTypes: []diff.MigrationHazardType{ + diff.MigrationHazardTypeAcquiresAccessExclusiveLock, diff.MigrationHazardTypeIndexDropped, diff.MigrationHazardTypeIndexBuild, - diff.MigrationHazardTypeAcquiresAccessExclusiveLock, }, ddl: []string{ "ALTER INDEX \"some_idx\" RENAME TO \"pgschemadiff_tmpidx_some_idx_MDEyMzQ1Rje4OTo7PD0$Pw\"", @@ -569,15 +565,11 @@ var partitionedIndexAcceptanceTestCases = []acceptanceTestCase{ "ALTER TABLE \"public\".\"foobar\" DROP CONSTRAINT \"pgschemadiff_tmpnn_EBESExQVRheYGRobHB0eHw\"", "ALTER TABLE \"public\".\"foobar\" DROP CONSTRAINT \"pgschemadiff_tmpnn_ICEiIyQlRieoKSorLC0uLw\"", "ALTER TABLE ONLY \"public\".\"foobar\" ADD CONSTRAINT \"some_idx\" PRIMARY KEY (foo, id)", - "ALTER TABLE \"public\".\"foobar_1\" ALTER COLUMN \"id\" SET NOT NULL", - "ALTER TABLE \"public\".\"foobar_1\" ALTER COLUMN \"foo\" SET NOT NULL", "CREATE UNIQUE INDEX CONCURRENTLY foobar_1_pkey ON public.foobar_1 USING btree (foo, id)", "ALTER TABLE \"public\".\"foobar_1\" ADD CONSTRAINT \"foobar_1_pkey\" PRIMARY KEY USING INDEX \"foobar_1_pkey\"", "ALTER INDEX \"some_idx\" ATTACH PARTITION \"foobar_1_pkey\"", "CREATE INDEX CONCURRENTLY new_foobar_1_some_local_idx ON public.foobar_1 USING btree (foo, bar, id)", "CREATE INDEX CONCURRENTLY new_idx ON public.foobar_1 USING btree (foo, bar)", - "ALTER TABLE \"public\".\"foobar_2\" ALTER COLUMN \"id\" SET NOT NULL", - "ALTER TABLE \"public\".\"foobar_2\" ALTER COLUMN \"foo\" SET NOT NULL", "CREATE UNIQUE INDEX CONCURRENTLY foobar_2_pkey ON public.foobar_2 USING btree (foo, id)", "ALTER TABLE \"public\".\"foobar_2\" ADD CONSTRAINT \"foobar_2_pkey\" PRIMARY KEY USING INDEX \"foobar_2_pkey\"", "ALTER INDEX \"some_idx\" ATTACH PARTITION \"foobar_2_pkey\"", diff --git a/pkg/diff/sql_generator.go b/pkg/diff/sql_generator.go index 3b9a9d6..bf57931 100644 --- a/pkg/diff/sql_generator.go +++ b/pkg/diff/sql_generator.go @@ -479,6 +479,7 @@ func (schemaSQLGenerator) Alter(diff schemaDiff) ([]Statement, error) { tableGraphs, err := diff.tableDiffs.resolveToSQLGraph(&tableSQLVertexGenerator{ deletedTablesByName: deletedTablesByName, tablesInNewSchemaByName: tablesInNewSchemaByName, + tableDiffsByName: buildDiffByNameMap[schema.Table, tableDiff](diff.tableDiffs.alters), }) if err != nil { return nil, fmt.Errorf("resolving table sql graphs: %w", err) @@ -614,6 +615,7 @@ func buildMap[K comparable, V any](v []V, getKey func(V) K) map[K]V { type tableSQLVertexGenerator struct { deletedTablesByName map[string]schema.Table tablesInNewSchemaByName map[string]schema.Table + tableDiffsByName map[string]tableDiff } var _ sqlVertexGenerator[schema.Table, tableDiff] = &tableSQLVertexGenerator{} @@ -799,6 +801,11 @@ func (t *tableSQLVertexGenerator) alterPartition(diff tableDiff) ([]Statement, e return nil, fmt.Errorf("check constraints on partitions: %w", ErrNotImplemented) } + var alteredParentColumnsByName map[string]columnDiff + if parentDiff, ok := t.tableDiffsByName[diff.new.ParentTableName]; ok { + alteredParentColumnsByName = buildDiffByNameMap[schema.Column, columnDiff](parentDiff.columnsDiff.alters) + } + var stmts []Statement // ColumnsDiff should only have nullability changes. Partitioned tables // aren't concerned about old/new columns added @@ -806,6 +813,16 @@ func (t *tableSQLVertexGenerator) alterPartition(diff tableDiff) ([]Statement, e if colDiff.old.IsNullable == colDiff.new.IsNullable { continue } + + if parentCol, ok := alteredParentColumnsByName[colDiff.new.Name]; ok { + if colDiff.new.IsNullable == parentCol.new.IsNullable && parentCol.new.IsNullable != parentCol.old.IsNullable { + // If the parent column's new nullability matches the child column, and the parent column's nullability + // is being altered, then we don't need to alter the child column's nullability because the DDL required + // to alter the parent's nullability will alter the child's nullability as well. + continue + } + } + alterColumnPrefix := fmt.Sprintf("%s ALTER COLUMN %s", alterTablePrefix(publicSchemaName(diff.new.Name)), schema.EscapeIdentifier(colDiff.new.Name)) if colDiff.new.IsNullable { stmts = append(stmts, Statement{