From 75079efa10974aa27d625eb878a3fd3767e22944 Mon Sep 17 00:00:00 2001
From: Michael Erickson <michae2@cockroachlabs.com>
Date: Fri, 17 Jan 2025 16:48:36 -0800
Subject: [PATCH] opt/memo: silence error in factorOutVirtualCols for mutation
 columns

Mutation columns (such as those being dropped by an ALTER TABLE DROP
COLUMN statement) exist in table metadata, but might not necessarily
have an associated computed column expression. This is because
`optbuilder.(*Builder).addComputedColsForTable` usually skips over
mutation columns.

In `memo.(*statisticsBuilder).factorOutVirtualCols` we gather all the
expressions for virtual computed columns of the table. We were failing
an assertion if we couldn't find the expression for a column. This is
fine, however, because `factorOutVirtualCols` only powers an
optimization in statistics builder and is not necessary for correctness.

This commit silences the assertion if the column is a mutation column,
and futhermore, only checks it in test builds otherwise, because ideally
we wouldn't want to miss the optimization for non-mutation columns.

Fixes: #129405
Fixes: #138147
Fixes: #138809

Release note (bug fix): Fix a rare bug in which a query might fail with
error "could not find computed column expression for column in table"
while dropping a virtual computed column from the table. This bug was
introduced in v23.2.4.
---
 .../testdata/logic_test/distsql_stats         | 80 ++++++++++++++++++-
 pkg/sql/opt/memo/statistics_builder.go        | 13 ++-
 2 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats
index 8980f7288fed..79aa3f8b93f6 100644
--- a/pkg/sql/logictest/testdata/logic_test/distsql_stats
+++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats
@@ -3336,8 +3336,14 @@ mutation         {crdb_internal_a_shard_3}    10         true
 statement ok
 SET CLUSTER SETTING jobs.debug.pausepoints = ''
 
+let $job
+SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY
+
+statement ok
+RESUME JOB $job
+
 statement ok
-RESUME JOB (SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status = 'paused' FETCH FIRST 1 ROWS ONLY)
+SHOW JOB WHEN COMPLETE $job
 
 # Test optimizer_use_virtual_computed_column_stats.
 
@@ -3506,6 +3512,78 @@ upper_bound   range_rows  distinct_range_rows  equal_rows
 'dispatched'  0           0                    1
 'delivered'   0           0                    1
 
+# Regression test for #138809: check that we can still use stats on virtual
+# computed columns after dropping the column.
+
+statement ok
+CREATE TABLE t138809 (
+  a INT PRIMARY KEY,
+  b INT AS (a + 1) VIRTUAL,
+  c INT AS (a + 2) VIRTUAL
+)
+
+statement ok
+INSERT INTO t138809 (a) VALUES (1), (2)
+
+statement ok
+ANALYZE t138809
+
+query TIIIIB colnames
+SELECT column_names, row_count, null_count, distinct_count, avg_size, histogram_id IS NOT NULL AS has_histogram
+FROM [SHOW STATISTICS FOR TABLE t138809]
+ORDER BY column_names::STRING
+----
+column_names  row_count  null_count  distinct_count  avg_size  has_histogram
+{a}           2          0           2               8         true
+{b}           2          0           2               8         true
+{c}           2          0           2               8         true
+
+query T
+EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1
+----
+distribution: full
+vectorized: true
+·
+• group (scalar)
+│ estimated row count: 1
+│
+└── • scan
+      estimated row count: 2 (100% of the table; stats collected <hidden> ago)
+      table: t138809@t138809_pkey
+      spans: [/1 - ]
+
+statement ok
+SET CLUSTER SETTING jobs.debug.pausepoints = 'schemachanger.root..1'
+
+statement error job \d+ was paused before it completed with reason: pause point "schemachanger\.root\.\.1" hit
+ALTER TABLE t138809 DROP COLUMN c
+
+query T
+EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1
+----
+distribution: full
+vectorized: true
+·
+• group (scalar)
+│ estimated row count: 1
+│
+└── • scan
+      estimated row count: 2 (100% of the table; stats collected <hidden> ago)
+      table: t138809@t138809_pkey
+      spans: [/1 - ]
+
+statement ok
+SET CLUSTER SETTING jobs.debug.pausepoints = DEFAULT
+
+let $job
+SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t138809 DROP COLUMN c' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY
+
+statement ok
+RESUME JOB $job
+
+statement ok
+SHOW JOB WHEN COMPLETE $job
+
 # Regression test for #67050: make sure we skip over enum values that have been
 # dropped when decoding histograms.
 
diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go
index c06462695c62..fd2d02080da9 100644
--- a/pkg/sql/opt/memo/statistics_builder.go
+++ b/pkg/sql/opt/memo/statistics_builder.go
@@ -5150,9 +5150,16 @@ func (sb *statisticsBuilder) factorOutVirtualCols(
 		}
 		expr, ok := tab.ComputedCols[colID]
 		if !ok {
-			panic(errors.AssertionFailedf(
-				"could not find computed column expression for column %v in table %v", colID, tab.Alias,
-			))
+			// If we can't find the computed column expression, this is probably a
+			// mutation column. Whatever the reason, it's safe to skip: we simply
+			// won't factor out matching expressions.
+			if buildutil.CrdbTestBuild &&
+				!tab.Table.Column(tab.MetaID.ColumnOrdinal(colID)).IsMutation() {
+				panic(errors.AssertionFailedf(
+					"could not find computed column expression for column %v in table %v", colID, tab.Alias,
+				))
+			}
+			return
 		}
 		virtExprs = append(virtExprs, virtExpr{colID: colID, expr: expr})
 	})