From 9a03da3b1fa017585207cdf370ad7fa2c075ab4f Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Fri, 2 Feb 2024 14:52:06 -0800 Subject: [PATCH] sql: allow usage of table stats on system.jobs In 23.1 (in a6e2818085a8c9097a2a1251d88f0d80533106b6, also fixed in fe2e2508ab6f34d82a53cc3ad599538c3a9b5a62) we allowed stats collection on `system.jobs` table. However, we forgot to update another place where the jobs table ID was mentioned - whether auto collection on the jobs table is allowed and whether usage (by the optimizer) of the table stats on jobs table is allowed. This is now fixed. Additionally, this commit performs a minor cleanup of tests around this. In particular, `stats` execbuilder test now runs in the local mode (previously it was using 5node due to some peculiar historical reasons). Epic: None Release note: None --- pkg/sql/create_stats.go | 19 ++------ pkg/sql/opt/exec/execbuilder/testdata/stats | 44 +++++++++++++++++-- .../exec/execbuilder/tests/5node/BUILD.bazel | 2 +- .../execbuilder/tests/5node/generated_test.go | 7 --- .../execbuilder/tests/local/generated_test.go | 7 +++ pkg/sql/stats/automatic_stats_test.go | 42 +++++++----------- pkg/sql/stats/stats_cache.go | 36 ++++++++------- 7 files changed, 87 insertions(+), 70 deletions(-) diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go index 5d70aa071f2c..e459391890a1 100644 --- a/pkg/sql/create_stats.go +++ b/pkg/sql/create_stats.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/featureflag" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" - "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -206,21 +205,9 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro ) } - if tableDesc.GetID() == keys.TableStatisticsTableID { - return nil, pgerror.New( - pgcode.WrongObjectType, "cannot create statistics on system.table_statistics", - ) - } - - if tableDesc.GetID() == keys.LeaseTableID { - return nil, pgerror.New( - pgcode.WrongObjectType, "cannot create statistics on system.lease", - ) - } - - if tableDesc.GetID() == keys.ScheduledJobsTableID { - return nil, pgerror.New( - pgcode.WrongObjectType, "cannot create statistics on system.scheduled_jobs", + if stats.DisallowedOnSystemTable(tableDesc.GetID()) { + return nil, pgerror.Newf( + pgcode.WrongObjectType, "cannot create statistics on system.%s", tableDesc.GetName(), ) } diff --git a/pkg/sql/opt/exec/execbuilder/testdata/stats b/pkg/sql/opt/exec/execbuilder/testdata/stats index 94de4847ef56..141a20ff23af 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/stats +++ b/pkg/sql/opt/exec/execbuilder/testdata/stats @@ -1,7 +1,4 @@ -# LogicTest: 5node - -# Tests that verify we retrieve the stats correctly. Note that we can't create -# statistics if distsql mode is OFF. +# LogicTest: local statement ok CREATE TABLE uv (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u)); @@ -373,3 +370,42 @@ limit │ └── filters │ └── j:1 IS NULL [outer=(1), immutable, constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)] └── 1 + +# Ensure we can run ALTER statements on the system.jobs table. +statement ok +INSERT INTO system.users VALUES ('node', NULL, true, 3); + +statement ok +GRANT node TO root; + +# Ensure that stats on the system.jobs table are being used. +statement ok +ALTER TABLE system.jobs INJECT STATISTICS '[ + { + "avg_size": 7, + "columns": [ + "id" + ], + "created_at": "2024-02-02 22:56:02.854028", + "distinct_count": 19, + "histo_col_type": "INT8", + "histo_version": 3, + "null_count": 0, + "row_count": 19 + } +]'; + +query T +EXPLAIN (OPT, VERBOSE) SELECT * FROM system.jobs; +---- +scan jobs + ├── columns: id:1 status:2 created:3 created_by_type:4 created_by_id:5 claim_session_id:6 claim_instance_id:7 num_runs:8 last_run:9 job_type:10 + ├── partial index predicates + │ └── jobs_run_stats_idx: filters + │ └── status:2 IN ('cancel-requested', 'pause-requested', 'pending', 'reverting', 'running') [outer=(2), constraints=(/2: [/'cancel-requested' - /'cancel-requested'] [/'pause-requested' - /'pause-requested'] [/'pending' - /'pending'] [/'reverting' - /'reverting'] [/'running' - /'running']; tight)] + ├── stats: [rows=19] + ├── cost: 59.49 + ├── key: (1) + ├── fd: (1)-->(2-10) + ├── distribution: test + └── prune: (1-10) diff --git a/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel b/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel index 5ec29998af4a..afffb303a4cb 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel +++ b/pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel @@ -12,7 +12,7 @@ go_test( "//build/toolchains:is_heavy": {"test.Pool": "heavy"}, "//conditions:default": {"test.Pool": "large"}, }), - shard_count = 29, + shard_count = 28, tags = [ "cpu:3", ], diff --git a/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go index 22c4f9f3edf4..013b32c12263 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go +++ b/pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go @@ -288,10 +288,3 @@ func TestExecBuild_scan_parallel( defer leaktest.AfterTest(t)() runExecBuildLogicTest(t, "scan_parallel") } - -func TestExecBuild_stats( - t *testing.T, -) { - defer leaktest.AfterTest(t)() - runExecBuildLogicTest(t, "stats") -} diff --git a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go index a3bc7229627a..9190587c2484 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go +++ b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go @@ -590,6 +590,13 @@ func TestExecBuild_srfs( runExecBuildLogicTest(t, "srfs") } +func TestExecBuild_stats( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runExecBuildLogicTest(t, "stats") +} + func TestExecBuild_straight_join( t *testing.T, ) { diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index c666b1025cae..9737d413fc03 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -807,44 +807,34 @@ func TestAnalyzeSystemTables(t *testing.T) { s.InternalDB().(descs.DB), ) require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory))) - var tableNames []string - tableNames = make([]string, 0, 40) - it, err := executor.QueryIterator( + rows, err := executor.QueryBuffered( ctx, "get-system-tables", nil, /* txn */ - "SELECT table_name FROM [SHOW TABLES FROM SYSTEM]", + "SELECT table_name FROM [SHOW TABLES FROM SYSTEM] WHERE type = 'table'", ) if err != nil { t.Fatal(err) } - var ok bool - for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) { - if err != nil { - t.Fatal(err) - } - row := it.Cur() - tableName := string(*row[0].(*tree.DOidWrapper).Wrapped.(*tree.DString)) - tableNames = append(tableNames, tableName) - } sqlRun := sqlutils.MakeSQLRunner(sqlDB) - expectZeroRows := false - for _, tableName := range tableNames { - // Stats may not be collected on system.lease and system.table_statistics. - if tableName == "lease" || tableName == "table_statistics" || - tableName == "jobs" || tableName == "scheduled_jobs" || - tableName == "role_id_seq" || - tableName == "tenant_id_seq" || - tableName == "descriptor_id_seq" { + getTableID := func(tableName string) descpb.ID { + var tableID int + row := sqlRun.QueryRow(t, fmt.Sprintf("SELECT 'system.%s'::REGCLASS::OID", tableName)) + row.Scan(&tableID) + return descpb.ID(tableID) + } + for _, row := range rows { + tableName := string(*row[0].(*tree.DOidWrapper).Wrapped.(*tree.DString)) + if DisallowedOnSystemTable(getTableID(tableName)) { continue } - sql := fmt.Sprintf("ANALYZE system.%s", tableName) - sqlRun.Exec(t, sql) - // We're testing that ANALYZE on every system table except the above two - // doesn't error out, and populates system.table_statistics. - if err := compareStatsCountWithZero(ctx, cache, tableName, s, expectZeroRows); err != nil { + sqlRun.Exec(t, fmt.Sprintf("ANALYZE system.%s", tableName)) + // We're testing that ANALYZE on every system table (except a few + // disallowed ones) doesn't error out and populates + // system.table_statistics. + if err = compareStatsCountWithZero(ctx, cache, tableName, s, false /* expectZeroRows */); err != nil { t.Fatal(err) } } diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index 59538caa0719..4ddfdadff077 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -233,9 +233,25 @@ func (sc *TableStatisticsCache) GetTableStats( return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast) } -func statsDisallowedSystemTable(tableID descpb.ID) bool { +// DisallowedOnSystemTable returns true if this tableID belongs to a special +// system table on which we want to disallow stats collection and stats usage. +func DisallowedOnSystemTable(tableID descpb.ID) bool { switch tableID { - case keys.TableStatisticsTableID, keys.LeaseTableID, keys.JobsTableID, keys.ScheduledJobsTableID: + // Disable stats on system.table_statistics because it can lead to deadlocks + // around the stats cache (which issues an internal query in + // getTableStatsFromDB to fetch statistics for a single table, and that + // query in turn will want table stats on system.table_statistics to come up + // with a plan). + // + // Disable stats on system.lease since it's known to cause hangs. + // TODO(yuzefovich): check whether it's still a problem. + // + // Disable stats on system.scheduled_jobs because the table is mutated too + // frequently and would trigger too many stats collections. The potential + // benefit is not worth the potential performance hit. + // TODO(yuzefovich): re-evaluate this assumption. Perhaps we could at + // least enable manual collection on this table. + case keys.TableStatisticsTableID, keys.LeaseTableID, keys.ScheduledJobsTableID: return true } return false @@ -245,12 +261,7 @@ func statsDisallowedSystemTable(tableID descpb.ID) bool { // used by the query optimizer. func statsUsageAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool { if catalog.IsSystemDescriptor(table) { - // Disable stats usage on system.table_statistics and system.lease. Looking - // up stats on system.lease is known to cause hangs, and the same could - // happen with system.table_statistics. Stats on system.jobs and - // system.scheduled_jobs are also disallowed because autostats are disabled - // on them. - if statsDisallowedSystemTable(table.GetID()) { + if DisallowedOnSystemTable(table.GetID()) { return false } // Return whether the optimizer is allowed to use stats on system tables. @@ -265,14 +276,7 @@ func autostatsCollectionAllowed( table catalog.TableDescriptor, clusterSettings *cluster.Settings, ) bool { if catalog.IsSystemDescriptor(table) { - // Disable autostats on system.table_statistics and system.lease. Looking - // up stats on system.lease is known to cause hangs, and the same could - // happen with system.table_statistics. No need to collect stats if we - // cannot use them. Stats on system.jobs and system.scheduled_jobs - // are also disallowed because they are mutated too frequently and would - // trigger too many stats collections. The potential benefit is not worth - // the potential performance hit. - if statsDisallowedSystemTable(table.GetID()) { + if DisallowedOnSystemTable(table.GetID()) { return false } // Return whether autostats collection is allowed on system tables,