diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go index 5d70aa071f2c..e459391890a1 100644 --- a/pkg/sql/create_stats.go +++ b/pkg/sql/create_stats.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/featureflag" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" - "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -206,21 +205,9 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro ) } - if tableDesc.GetID() == keys.TableStatisticsTableID { - return nil, pgerror.New( - pgcode.WrongObjectType, "cannot create statistics on system.table_statistics", - ) - } - - if tableDesc.GetID() == keys.LeaseTableID { - return nil, pgerror.New( - pgcode.WrongObjectType, "cannot create statistics on system.lease", - ) - } - - if tableDesc.GetID() == keys.ScheduledJobsTableID { - return nil, pgerror.New( - pgcode.WrongObjectType, "cannot create statistics on system.scheduled_jobs", + if stats.DisallowedOnSystemTable(tableDesc.GetID()) { + return nil, pgerror.Newf( + pgcode.WrongObjectType, "cannot create statistics on system.%s", tableDesc.GetName(), ) } diff --git a/pkg/sql/opt/exec/execbuilder/testdata/stats b/pkg/sql/opt/exec/execbuilder/testdata/stats index 94de4847ef56..141a20ff23af 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/stats +++ b/pkg/sql/opt/exec/execbuilder/testdata/stats @@ -1,7 +1,4 @@ -# LogicTest: 5node - -# Tests that verify we retrieve the stats correctly. Note that we can't create -# statistics if distsql mode is OFF. +# LogicTest: local statement ok CREATE TABLE uv (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u)); @@ -373,3 +370,42 @@ limit │ └── filters │ └── j:1 IS NULL [outer=(1), immutable, constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)] └── 1 + +# Ensure we can run ALTER statements on the system.jobs table. +statement ok +INSERT INTO system.users VALUES ('node', NULL, true, 3); + +statement ok +GRANT node TO root; + +# Ensure that stats on the system.jobs table are being used. +statement ok +ALTER TABLE system.jobs INJECT STATISTICS '[ + { + "avg_size": 7, + "columns": [ + "id" + ], + "created_at": "2024-02-02 22:56:02.854028", + "distinct_count": 19, + "histo_col_type": "INT8", + "histo_version": 3, + "null_count": 0, + "row_count": 19 + } +]'; + +query T +EXPLAIN (OPT, VERBOSE) SELECT * FROM system.jobs; +---- +scan jobs + ├── columns: id:1 status:2 created:3 created_by_type:4 created_by_id:5 claim_session_id:6 claim_instance_id:7 num_runs:8 last_run:9 job_type:10 + ├── partial index predicates + │ └── jobs_run_stats_idx: filters + │ └── status:2 IN ('cancel-requested', 'pause-requested', 'pending', 'reverting', 'running') [outer=(2), constraints=(/2: [/'cancel-requested' - /'cancel-requested'] [/'pause-requested' - /'pause-requested'] [/'pending' - /'pending'] [/'reverting' - /'reverting'] [/'running' - /'running']; tight)] + ├── stats: [rows=19] + ├── cost: 59.49 + ├── key: (1) + ├── fd: (1)-->(2-10) + ├── distribution: test + └── prune: (1-10) diff --git a/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel b/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel index 5ec29998af4a..afffb303a4cb 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel +++ b/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel @@ -12,7 +12,7 @@ go_test( "//build/toolchains:is_heavy": {"test.Pool": "heavy"}, "//conditions:default": {"test.Pool": "large"}, }), - shard_count = 29, + shard_count = 28, tags = [ "cpu:3", ], diff --git a/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go index 22c4f9f3edf4..013b32c12263 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go +++ b/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go @@ -288,10 +288,3 @@ func TestExecBuild_scan_parallel( defer leaktest.AfterTest(t)() runExecBuildLogicTest(t, "scan_parallel") } - -func TestExecBuild_stats( - t *testing.T, -) { - defer leaktest.AfterTest(t)() - runExecBuildLogicTest(t, "stats") -} diff --git a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go index a3bc7229627a..9190587c2484 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go +++ b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go @@ -590,6 +590,13 @@ func TestExecBuild_srfs( runExecBuildLogicTest(t, "srfs") } +func TestExecBuild_stats( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runExecBuildLogicTest(t, "stats") +} + func TestExecBuild_straight_join( t *testing.T, ) { diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index c666b1025cae..9737d413fc03 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -807,44 +807,34 @@ func TestAnalyzeSystemTables(t *testing.T) { s.InternalDB().(descs.DB), ) require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory))) - var tableNames []string - tableNames = make([]string, 0, 40) - it, err := executor.QueryIterator( + rows, err := executor.QueryBuffered( ctx, "get-system-tables", nil, /* txn */ - "SELECT table_name FROM [SHOW TABLES FROM SYSTEM]", + "SELECT table_name FROM [SHOW TABLES FROM SYSTEM] WHERE type = 'table'", ) if err != nil { t.Fatal(err) } - var ok bool - for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { - if err != nil { - t.Fatal(err) - } - row := it.Cur() - tableName := string(*row[0].(*tree.DOidWrapper).Wrapped.(*tree.DString)) - tableNames = append(tableNames, tableName) - } sqlRun := sqlutils.MakeSQLRunner(sqlDB) - expectZeroRows := false - for _, tableName := range tableNames { - // Stats may not be collected on system.lease and system.table_statistics. - if tableName == "lease" || tableName == "table_statistics" || - tableName == "jobs" || tableName == "scheduled_jobs" || - tableName == "role_id_seq" || - tableName == "tenant_id_seq" || - tableName == "descriptor_id_seq" { + getTableID := func(tableName string) descpb.ID { + var tableID int + row := sqlRun.QueryRow(t, fmt.Sprintf("SELECT 'system.%s'::REGCLASS::OID", tableName)) + row.Scan(&tableID) + return descpb.ID(tableID) + } + for _, row := range rows { + tableName := string(*row[0].(*tree.DOidWrapper).Wrapped.(*tree.DString)) + if DisallowedOnSystemTable(getTableID(tableName)) { continue } - sql := fmt.Sprintf("ANALYZE system.%s", tableName) - sqlRun.Exec(t, sql) - // We're testing that ANALYZE on every system table except the above two - // doesn't error out, and populates system.table_statistics. - if err := compareStatsCountWithZero(ctx, cache, tableName, s, expectZeroRows); err != nil { + sqlRun.Exec(t, fmt.Sprintf("ANALYZE system.%s", tableName)) + // We're testing that ANALYZE on every system table (except a few + // disallowed ones) doesn't error out and populates + // system.table_statistics. + if err = compareStatsCountWithZero(ctx, cache, tableName, s, false /* expectZeroRows */); err != nil { t.Fatal(err) } } diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 59538caa0719..4ddfdadff077 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -233,9 +233,25 @@ func (sc *TableStatisticsCache) GetTableStats( return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast) } -func statsDisallowedSystemTable(tableID descpb.ID) bool { +// DisallowedOnSystemTable returns true if this tableID belongs to a special +// system table on which we want to disallow stats collection and stats usage. +func DisallowedOnSystemTable(tableID descpb.ID) bool { switch tableID { - case keys.TableStatisticsTableID, keys.LeaseTableID, keys.JobsTableID, keys.ScheduledJobsTableID: + // Disable stats on system.table_statistics because it can lead to deadlocks + // around the stats cache (which issues an internal query in + // getTableStatsFromDB to fetch statistics for a single table, and that + // query in turn will want table stats on system.table_statistics to come up + // with a plan). + // + // Disable stats on system.lease since it's known to cause hangs. + // TODO(yuzefovich): check whether it's still a problem. + // + // Disable stats on system.scheduled_jobs because the table is mutated too + // frequently and would trigger too many stats collections. The potential + // benefit is not worth the potential performance hit. + // TODO(yuzefovich): re-evaluate this assumption. Perhaps we could at + // least enable manual collection on this table. + case keys.TableStatisticsTableID, keys.LeaseTableID, keys.ScheduledJobsTableID: return true } return false @@ -245,12 +261,7 @@ func statsDisallowedSystemTable(tableID descpb.ID) bool { // used by the query optimizer. func statsUsageAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool { if catalog.IsSystemDescriptor(table) { - // Disable stats usage on system.table_statistics and system.lease. Looking - // up stats on system.lease is known to cause hangs, and the same could - // happen with system.table_statistics. Stats on system.jobs and - // system.scheduled_jobs are also disallowed because autostats are disabled - // on them. - if statsDisallowedSystemTable(table.GetID()) { + if DisallowedOnSystemTable(table.GetID()) { return false } // Return whether the optimizer is allowed to use stats on system tables. @@ -265,14 +276,7 @@ func autostatsCollectionAllowed( table catalog.TableDescriptor, clusterSettings *cluster.Settings, ) bool { if catalog.IsSystemDescriptor(table) { - // Disable autostats on system.table_statistics and system.lease. Looking - // up stats on system.lease is known to cause hangs, and the same could - // happen with system.table_statistics. No need to collect stats if we - // cannot use them. Stats on system.jobs and system.scheduled_jobs - // are also disallowed because they are mutated too frequently and would - // trigger too many stats collections. The potential benefit is not worth - // the potential performance hit. - if statsDisallowedSystemTable(table.GetID()) { + if DisallowedOnSystemTable(table.GetID()) { return false } // Return whether autostats collection is allowed on system tables,