From 75079efa10974aa27d625eb878a3fd3767e22944 Mon Sep 17 00:00:00 2001 From: Michael Erickson <michae2@cockroachlabs.com> Date: Fri, 17 Jan 2025 16:48:36 -0800 Subject: [PATCH] opt/memo: silence error in factorOutVirtualCols for mutation columns Mutation columns (such as those being dropped by an ALTER TABLE DROP COLUMN statement) exist in table metadata, but might not necessarily have an associated computed column expression. This is because `optbuilder.(*Builder).addComputedColsForTable` usually skips over mutation columns. In `memo.(*statisticsBuilder).factorOutVirtualCols` we gather all the expressions for virtual computed columns of the table. We were failing an assertion if we couldn't find the expression for a column. This is fine, however, because `factorOutVirtualCols` only powers an optimization in statistics builder and is not necessary for correctness. This commit silences the assertion if the column is a mutation column, and futhermore, only checks it in test builds otherwise, because ideally we wouldn't want to miss the optimization for non-mutation columns. Fixes: #129405 Fixes: #138147 Fixes: #138809 Release note (bug fix): Fix a rare bug in which a query might fail with error "could not find computed column expression for column in table" while dropping a virtual computed column from the table. This bug was introduced in v23.2.4. --- .../testdata/logic_test/distsql_stats | 80 ++++++++++++++++++- pkg/sql/opt/memo/statistics_builder.go | 13 ++- 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 8980f7288fed..79aa3f8b93f6 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -3336,8 +3336,14 @@ mutation {crdb_internal_a_shard_3} 10 true statement ok SET CLUSTER SETTING jobs.debug.pausepoints = '' +let $job +SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY + +statement ok +RESUME JOB $job + statement ok -RESUME JOB (SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status = 'paused' FETCH FIRST 1 ROWS ONLY) +SHOW JOB WHEN COMPLETE $job # Test optimizer_use_virtual_computed_column_stats. @@ -3506,6 +3512,78 @@ upper_bound range_rows distinct_range_rows equal_rows 'dispatched' 0 0 1 'delivered' 0 0 1 +# Regression test for #138809: check that we can still use stats on virtual +# computed columns after dropping the column. + +statement ok +CREATE TABLE t138809 ( + a INT PRIMARY KEY, + b INT AS (a + 1) VIRTUAL, + c INT AS (a + 2) VIRTUAL +) + +statement ok +INSERT INTO t138809 (a) VALUES (1), (2) + +statement ok +ANALYZE t138809 + +query TIIIIB colnames +SELECT column_names, row_count, null_count, distinct_count, avg_size, histogram_id IS NOT NULL AS has_histogram +FROM [SHOW STATISTICS FOR TABLE t138809] +ORDER BY column_names::STRING +---- +column_names row_count null_count distinct_count avg_size has_histogram +{a} 2 0 2 8 true +{b} 2 0 2 8 true +{c} 2 0 2 8 true + +query T +EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1 +---- +distribution: full +vectorized: true +· +• group (scalar) +│ estimated row count: 1 +│ +└── • scan + estimated row count: 2 (100% of the table; stats collected <hidden> ago) + table: t138809@t138809_pkey + spans: [/1 - ] + +statement ok +SET CLUSTER SETTING jobs.debug.pausepoints = 'schemachanger.root..1' + +statement error job \d+ was paused before it completed with reason: pause point "schemachanger\.root\.\.1" hit +ALTER TABLE t138809 DROP COLUMN c + +query T +EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1 +---- +distribution: full +vectorized: true +· +• group (scalar) +│ estimated row count: 1 +│ +└── • scan + estimated row count: 2 (100% of the table; stats collected <hidden> ago) + table: t138809@t138809_pkey + spans: [/1 - ] + +statement ok +SET CLUSTER SETTING jobs.debug.pausepoints = DEFAULT + +let $job +SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t138809 DROP COLUMN c' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY + +statement ok +RESUME JOB $job + +statement ok +SHOW JOB WHEN COMPLETE $job + # Regression test for #67050: make sure we skip over enum values that have been # dropped when decoding histograms. diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index c06462695c62..fd2d02080da9 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -5150,9 +5150,16 @@ func (sb *statisticsBuilder) factorOutVirtualCols( } expr, ok := tab.ComputedCols[colID] if !ok { - panic(errors.AssertionFailedf( - "could not find computed column expression for column %v in table %v", colID, tab.Alias, - )) + // If we can't find the computed column expression, this is probably a + // mutation column. Whatever the reason, it's safe to skip: we simply + // won't factor out matching expressions. + if buildutil.CrdbTestBuild && + !tab.Table.Column(tab.MetaID.ColumnOrdinal(colID)).IsMutation() { + panic(errors.AssertionFailedf( + "could not find computed column expression for column %v in table %v", colID, tab.Alias, + )) + } + return } virtExprs = append(virtExprs, virtExpr{colID: colID, expr: expr}) })