Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
118693: sql: allow usage of table stats on system.jobs r=yuzefovich a=yuzefovich

In 23.1 (in a6e2818, also fixed in fe2e250) we allowed stats collection on `system.jobs` table. However, we forgot to update another place where the jobs table ID was mentioned - whether auto collection on the jobs table is allowed and whether usage (by the optimizer) of the table stats on jobs table is allowed. This is now fixed.

Additionally, this commit performs a minor cleanup of tests around this. In particular, `stats` execbuilder test now runs in the local mode (previously it was using 5node due to some peculiar historical reasons).

Epic: None

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
craig[bot] and yuzefovich committed Feb 6, 2024
2 parents 715628a + 9a03da3 commit a57ec22
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 70 deletions.
19 changes: 3 additions & 16 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/featureflag"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
Expand Down Expand Up @@ -206,21 +205,9 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
)
}

if tableDesc.GetID() == keys.TableStatisticsTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.table_statistics",
)
}

if tableDesc.GetID() == keys.LeaseTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.lease",
)
}

if tableDesc.GetID() == keys.ScheduledJobsTableID {
return nil, pgerror.New(
pgcode.WrongObjectType, "cannot create statistics on system.scheduled_jobs",
if stats.DisallowedOnSystemTable(tableDesc.GetID()) {
return nil, pgerror.Newf(
pgcode.WrongObjectType, "cannot create statistics on system.%s", tableDesc.GetName(),
)
}

Expand Down
44 changes: 40 additions & 4 deletions pkg/sql/opt/exec/execbuilder/testdata/stats
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# LogicTest: 5node

# Tests that verify we retrieve the stats correctly. Note that we can't create
# statistics if distsql mode is OFF.
# LogicTest: local

statement ok
CREATE TABLE uv (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u));
Expand Down Expand Up @@ -373,3 +370,42 @@ limit
│ └── filters
│ └── j:1 IS NULL [outer=(1), immutable, constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)]
└── 1

# Ensure we can run ALTER statements on the system.jobs table.
statement ok
INSERT INTO system.users VALUES ('node', NULL, true, 3);

statement ok
GRANT node TO root;

# Ensure that stats on the system.jobs table are being used.
statement ok
ALTER TABLE system.jobs INJECT STATISTICS '[
{
"avg_size": 7,
"columns": [
"id"
],
"created_at": "2024-02-02 22:56:02.854028",
"distinct_count": 19,
"histo_col_type": "INT8",
"histo_version": 3,
"null_count": 0,
"row_count": 19
}
]';

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM system.jobs;
----
scan jobs
├── columns: id:1 status:2 created:3 created_by_type:4 created_by_id:5 claim_session_id:6 claim_instance_id:7 num_runs:8 last_run:9 job_type:10
├── partial index predicates
│ └── jobs_run_stats_idx: filters
│ └── status:2 IN ('cancel-requested', 'pause-requested', 'pending', 'reverting', 'running') [outer=(2), constraints=(/2: [/'cancel-requested' - /'cancel-requested'] [/'pause-requested' - /'pause-requested'] [/'pending' - /'pending'] [/'reverting' - /'reverting'] [/'running' - /'running']; tight)]
├── stats: [rows=19]
├── cost: 59.49
├── key: (1)
├── fd: (1)-->(2-10)
├── distribution: test
└── prune: (1-10)
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ go_test(
"//build/toolchains:is_heavy": {"test.Pool": "heavy"},
"//conditions:default": {"test.Pool": "large"},
}),
shard_count = 29,
shard_count = 28,
tags = [
"cpu:3",
],
Expand Down
7 changes: 0 additions & 7 deletions pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 16 additions & 26 deletions pkg/sql/stats/automatic_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -807,44 +807,34 @@ func TestAnalyzeSystemTables(t *testing.T) {
s.InternalDB().(descs.DB),
)
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
var tableNames []string
tableNames = make([]string, 0, 40)

it, err := executor.QueryIterator(
rows, err := executor.QueryBuffered(
ctx,
"get-system-tables",
nil, /* txn */
"SELECT table_name FROM [SHOW TABLES FROM SYSTEM]",
"SELECT table_name FROM [SHOW TABLES FROM SYSTEM] WHERE type = 'table'",
)
if err != nil {
t.Fatal(err)
}

var ok bool
for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) {
if err != nil {
t.Fatal(err)
}
row := it.Cur()
tableName := string(*row[0].(*tree.DOidWrapper).Wrapped.(*tree.DString))
tableNames = append(tableNames, tableName)
}
sqlRun := sqlutils.MakeSQLRunner(sqlDB)
expectZeroRows := false
for _, tableName := range tableNames {
// Stats may not be collected on system.lease and system.table_statistics.
if tableName == "lease" || tableName == "table_statistics" ||
tableName == "jobs" || tableName == "scheduled_jobs" ||
tableName == "role_id_seq" ||
tableName == "tenant_id_seq" ||
tableName == "descriptor_id_seq" {
getTableID := func(tableName string) descpb.ID {
var tableID int
row := sqlRun.QueryRow(t, fmt.Sprintf("SELECT 'system.%s'::REGCLASS::OID", tableName))
row.Scan(&tableID)
return descpb.ID(tableID)
}
for _, row := range rows {
tableName := string(*row[0].(*tree.DOidWrapper).Wrapped.(*tree.DString))
if DisallowedOnSystemTable(getTableID(tableName)) {
continue
}
sql := fmt.Sprintf("ANALYZE system.%s", tableName)
sqlRun.Exec(t, sql)
// We're testing that ANALYZE on every system table except the above two
// doesn't error out, and populates system.table_statistics.
if err := compareStatsCountWithZero(ctx, cache, tableName, s, expectZeroRows); err != nil {
sqlRun.Exec(t, fmt.Sprintf("ANALYZE system.%s", tableName))
// We're testing that ANALYZE on every system table (except a few
// disallowed ones) doesn't error out and populates
// system.table_statistics.
if err = compareStatsCountWithZero(ctx, cache, tableName, s, false /* expectZeroRows */); err != nil {
t.Fatal(err)
}
}
Expand Down
36 changes: 20 additions & 16 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,25 @@ func (sc *TableStatisticsCache) GetTableStats(
return sc.getTableStatsFromCache(ctx, table.GetID(), &forecast)
}

func statsDisallowedSystemTable(tableID descpb.ID) bool {
// DisallowedOnSystemTable returns true if this tableID belongs to a special
// system table on which we want to disallow stats collection and stats usage.
func DisallowedOnSystemTable(tableID descpb.ID) bool {
switch tableID {
case keys.TableStatisticsTableID, keys.LeaseTableID, keys.JobsTableID, keys.ScheduledJobsTableID:
// Disable stats on system.table_statistics because it can lead to deadlocks
// around the stats cache (which issues an internal query in
// getTableStatsFromDB to fetch statistics for a single table, and that
// query in turn will want table stats on system.table_statistics to come up
// with a plan).
//
// Disable stats on system.lease since it's known to cause hangs.
// TODO(yuzefovich): check whether it's still a problem.
//
// Disable stats on system.scheduled_jobs because the table is mutated too
// frequently and would trigger too many stats collections. The potential
// benefit is not worth the potential performance hit.
// TODO(yuzefovich): re-evaluate this assumption. Perhaps we could at
// least enable manual collection on this table.
case keys.TableStatisticsTableID, keys.LeaseTableID, keys.ScheduledJobsTableID:
return true
}
return false
Expand All @@ -245,12 +261,7 @@ func statsDisallowedSystemTable(tableID descpb.ID) bool {
// used by the query optimizer.
func statsUsageAllowed(table catalog.TableDescriptor, clusterSettings *cluster.Settings) bool {
if catalog.IsSystemDescriptor(table) {
// Disable stats usage on system.table_statistics and system.lease. Looking
// up stats on system.lease is known to cause hangs, and the same could
// happen with system.table_statistics. Stats on system.jobs and
// system.scheduled_jobs are also disallowed because autostats are disabled
// on them.
if statsDisallowedSystemTable(table.GetID()) {
if DisallowedOnSystemTable(table.GetID()) {
return false
}
// Return whether the optimizer is allowed to use stats on system tables.
Expand All @@ -265,14 +276,7 @@ func autostatsCollectionAllowed(
table catalog.TableDescriptor, clusterSettings *cluster.Settings,
) bool {
if catalog.IsSystemDescriptor(table) {
// Disable autostats on system.table_statistics and system.lease. Looking
// up stats on system.lease is known to cause hangs, and the same could
// happen with system.table_statistics. No need to collect stats if we
// cannot use them. Stats on system.jobs and system.scheduled_jobs
// are also disallowed because they are mutated too frequently and would
// trigger too many stats collections. The potential benefit is not worth
// the potential performance hit.
if statsDisallowedSystemTable(table.GetID()) {
if DisallowedOnSystemTable(table.GetID()) {
return false
}
// Return whether autostats collection is allowed on system tables,
Expand Down

0 comments on commit a57ec22

Please sign in to comment.