Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
103106: sql/catalog: avoid fetching descriptors when fetching comments r=fqazi a=fqazi

Previously, we refactored the code to fetch comments, descriptors, and zone config together in all cases. A side effect of this change was that the crdb_internal.kv_system_comments table was substantially slower for larger tables leading to big regressions. To address this, this patch adds a method of only fetching comments within the collections. 

Additionally, we added many builtins into both functions, which meant our existing virtual index look-up could end up falling back too frequently to full scans. Also, a virtual index is added on the kv_catalog_comments table for fast point look-ups when referencing descriptors.


Informs: cockroachdb#102851
Fixes: cockroachdb#102613

Release note (bug fix): Optimize over-head of
pg_catalog.pg_description and pg_catalog.pg_shdescription, which can lead to performance regression relative to 22.2

Co-authored-by: Faizan Qazi <[email protected]>
  • Loading branch information
craig[bot] and fqazi committed May 13, 2023
2 parents 8b1b640 + 6a66439 commit 63eb6b8
Show file tree
Hide file tree
Showing 17 changed files with 457 additions and 130 deletions.
45 changes: 45 additions & 0 deletions pkg/bench/rttanalysis/orm_queries_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,30 @@ WHERE c.relkind IN ('f', 'm', 'p', 'r', 'v')
AND pg_catalog.pg_table_is_visible(c.oid)`,
},

{
Name: "django comment introspection with comments",
Setup: `CREATE TABLE t1(a int primary key, b int);
CREATE TABLE t2(a int primary key, b int);
CREATE TABLE t3(a int primary key, b int);
COMMENT ON TABLE t1 is 't1';
COMMENT ON TABLE t2 is 't2';
COMMENT ON TABLE t3 is 't1';
`,
Stmt: `SELECT
c.relname,
CASE
WHEN c.relispartition THEN 'p'
WHEN c.relkind IN ('m', 'v') THEN 'v'
ELSE 't'
END,
obj_description(c.oid, 'pg_class')
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind IN ('f', 'm', 'p', 'r', 'v')
AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
AND pg_catalog.pg_table_is_visible(c.oid);`,
},

{
Name: "activerecord type introspection query",
Stmt: `SELECT
Expand Down Expand Up @@ -204,6 +228,27 @@ WHERE
Stmt: `SELECT * FROM pg_attribute`,
},

{
Name: "introspection description join",
Setup: `CREATE TABLE t1(a int primary key, b int);`,
Stmt: `SELECT
n.nspname, relname, d.description
FROM
pg_description AS d
INNER JOIN pg_class AS c ON d.objoid = c.oid
INNER JOIN pg_namespace AS n ON n.oid = c.relnamespace
WHERE
d.objsubid = 0
AND n.nspname
NOT IN (
'gp_toolkit':::STRING:::NAME,
'information_schema':::STRING:::NAME,
'pgagent':::STRING:::NAME,
'bench':::STRING:::NAME
)
AND n.nspname NOT LIKE 'pg_%';`,
},

{
Name: "has_schema_privilege",
Setup: `CREATE SCHEMA s`,
Expand Down
6 changes: 4 additions & 2 deletions pkg/bench/rttanalysis/testdata/benchmark_expectations
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ exp,benchmark
4,ORMQueries/django_column_introspection_1_table
4,ORMQueries/django_column_introspection_4_tables
4,ORMQueries/django_column_introspection_8_tables
5,ORMQueries/django_table_introspection_1_table
5,ORMQueries/django_table_introspection_8_tables
6,ORMQueries/django_comment_introspection_with_comments
6,ORMQueries/django_table_introspection_1_table
6,ORMQueries/django_table_introspection_8_tables
0,ORMQueries/has_column_privilege_using_attnum
0,ORMQueries/has_column_privilege_using_column_name
0,ORMQueries/has_schema_privilege
Expand All @@ -76,6 +77,7 @@ exp,benchmark
12,ORMQueries/hasura_column_descriptions_8_tables
5,ORMQueries/hasura_column_descriptions_modified
4,ORMQueries/information_schema._pg_index_position
134,ORMQueries/introspection_description_join
4,ORMQueries/pg_attribute
4,ORMQueries/pg_class
6,ORMQueries/pg_is_other_temp_schema
Expand Down
2 changes: 2 additions & 0 deletions pkg/cli/clisqlshell/testdata/describe
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ pg_catalog,pg_db_role_setting,table,admin,NULL
pg_catalog,pg_default_acl,table,admin,NULL
pg_catalog,pg_depend,table,admin,NULL
pg_catalog,pg_description,table,admin,NULL
pg_catalog,pg_description_objoid_idx,index,admin,NULL
pg_catalog,pg_enum,table,admin,NULL
pg_catalog,pg_event_trigger,table,admin,NULL
pg_catalog,pg_extension,table,admin,NULL
Expand Down Expand Up @@ -563,6 +564,7 @@ pg_catalog,pg_depend,table,admin,NULL,permanent,prefix,"dependency relationships
https://www.postgresql.org/docs/9.5/catalog-pg-depend.html"
pg_catalog,pg_description,table,admin,NULL,permanent,prefix,"object comments
https://www.postgresql.org/docs/9.5/catalog-pg-description.html"
pg_catalog,pg_description_objoid_idx,index,admin,NULL,permanent,prefix,
pg_catalog,pg_enum,table,admin,NULL,permanent,prefix,"enum types and labels (empty - feature does not exist)
https://www.postgresql.org/docs/9.5/catalog-pg-enum.html"
pg_catalog,pg_event_trigger,table,admin,NULL,permanent,prefix,"event triggers (empty - feature does not exist)
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/catalog/descs/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,23 @@ func (tc *Collection) GetAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog,
return ret.Catalog, nil
}

// GetAllComments gets all comments for all descriptors. This method never
// returns the underlying catalog, since it will be incomplete and only
// contain comments.
func (tc *Collection) GetAllComments(
ctx context.Context, txn *kv.Txn,
) (nstree.CommentCatalog, error) {
kvComments, err := tc.cr.ScanAllComments(ctx, txn)
if err != nil {
return nil, err
}
comments, err := tc.aggregateAllLayers(ctx, txn, kvComments)
if err != nil {
return nil, err
}
return comments, nil
}

// GetAllFromStorageUnvalidated delegates to an uncached catkv.CatalogReader's
// ScanAll method. Nothing is cached, validated or hydrated. This is to be used
// sparingly and only in situations which warrant it, where an unmediated view
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/catalog/internal/catkv/catalog_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ type CatalogReader interface {
// ScanAll scans the entirety of the descriptor and namespace tables.
ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error)

// ScanAllComments scans only the entirety of the comments table.
ScanAllComments(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error)

// ScanNamespaceForDatabases scans the portion of the namespace table which
// contains all database name entries.
ScanNamespaceForDatabases(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error)
Expand Down Expand Up @@ -159,6 +162,20 @@ func (cr catalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalo
return mc.Catalog, nil
}

// ScanAllComments is part of the CatalogReader interface.
func (cr catalogReader) ScanAllComments(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) {
var mc nstree.MutableCatalog
cq := catalogQuery{codec: cr.codec}
err := cq.query(ctx, txn, &mc, func(codec keys.SQLCodec, b *kv.Batch) {
scan(ctx, b, codec.IndexPrefix(keys.NamespaceTableID, catconstants.NamespaceTablePrimaryIndexID))
scan(ctx, b, catalogkeys.CommentsMetadataPrefix(codec))
})
if err != nil {
return nstree.Catalog{}, err
}
return mc.Catalog, nil
}

func (cr catalogReader) scanNamespace(
ctx context.Context, txn *kv.Txn, prefix roachpb.Key,
) (nstree.Catalog, error) {
Expand Down
27 changes: 27 additions & 0 deletions pkg/sql/catalog/internal/catkv/catalog_reader_cached.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type cachedCatalogReader struct {
// has* indicates previously completed lookups. When set, we
// know the corresponding catalog data is in the cache.
hasScanAll bool
hasScanAllComments bool
hasScanNamespaceForDatabases bool

// memAcc is the actual account of an injected, upstream monitor
Expand Down Expand Up @@ -149,6 +150,31 @@ func (c *cachedCatalogReader) Reset(ctx context.Context) {
}
}

// ScanAllComments is part of the CatalogReader interface.
func (c *cachedCatalogReader) ScanAllComments(
ctx context.Context, txn *kv.Txn,
) (nstree.Catalog, error) {
if c.hasScanAllComments {
return c.cache.Catalog, nil
}
// Scan all catalog comments.
read, err := c.cr.ScanAllComments(ctx, txn)
if err != nil {
return nstree.Catalog{}, err
}
// We don't wipe out anything we already read when
// updating the cache. So add the comments in and then
// add back any descriptors + comments we read earlier.
mergedCatalog := nstree.MutableCatalog{}
mergedCatalog.AddAll(read)
mergedCatalog.AddAll(c.cache.Catalog)
if err := c.ensure(ctx, mergedCatalog.Catalog); err != nil {
return nstree.Catalog{}, err
}
c.hasScanAllComments = true
return read, nil
}

// ScanAll is part of the CatalogReader interface.
func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) {
if c.hasScanAll {
Expand All @@ -173,6 +199,7 @@ func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.
}
c.hasScanAll = true
c.hasScanNamespaceForDatabases = true
c.hasScanAllComments = true
for id, s := range c.byIDState {
s.hasScanNamespaceForDatabaseEntries = true
s.hasScanNamespaceForDatabaseSchemas = true
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/catalog/internal/catkv/catalog_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ func TestDataDriven(t *testing.T) {
}
return h.doCatalogQuery(ctx, q)

case "scan_all_comments":
q := func(ctx context.Context, txn *kv.Txn, cr catkv.CatalogReader) (nstree.Catalog, error) {
return cr.ScanAllComments(ctx, txn)
}
return h.doCatalogQuery(ctx, q)

case "scan_namespace_for_databases":
q := func(ctx context.Context, txn *kv.Txn, cr catkv.CatalogReader) (nstree.Catalog, error) {
return cr.ScanNamespaceForDatabases(ctx, txn)
Expand Down
149 changes: 149 additions & 0 deletions pkg/sql/catalog/internal/catkv/testdata/testdata
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,152 @@ trace:
- Get /NamespaceTable/30/1/123/456/"foo"/4/1
cached:
- Get /NamespaceTable/30/1/123/456/"foo"/4/1

# Reset to clear any caching
reset
----

# Scanning all comments after resetting should only
# give us comments
scan_all_comments
----
catalog:
"001":
namespace: (0, 0, "system")
"003":
namespace: (1, 29, "descriptor")
"004":
namespace: (1, 29, "users")
"005":
namespace: (1, 29, "zones")
"006":
namespace: (1, 29, "settings")
"007":
namespace: (1, 29, "descriptor_id_seq")
"008":
namespace: (1, 29, "tenants")
"011":
namespace: (1, 29, "lease")
"012":
namespace: (1, 29, "eventlog")
"013":
namespace: (1, 29, "rangelog")
"014":
namespace: (1, 29, "ui")
"015":
namespace: (1, 29, "jobs")
"019":
namespace: (1, 29, "web_sessions")
"020":
namespace: (1, 29, "table_statistics")
"021":
namespace: (1, 29, "locations")
"023":
namespace: (1, 29, "role_members")
"024":
namespace: (1, 29, "comments")
"025":
namespace: (1, 29, "replication_constraint_stats")
"026":
namespace: (1, 29, "replication_critical_localities")
"027":
namespace: (1, 29, "replication_stats")
"028":
namespace: (1, 29, "reports_meta")
"029":
namespace: (1, 0, "public")
"030":
namespace: (1, 29, "namespace")
"031":
namespace: (1, 29, "protected_ts_meta")
"032":
namespace: (1, 29, "protected_ts_records")
"033":
namespace: (1, 29, "role_options")
"034":
namespace: (1, 29, "statement_bundle_chunks")
"035":
namespace: (1, 29, "statement_diagnostics_requests")
"036":
namespace: (1, 29, "statement_diagnostics")
"037":
namespace: (1, 29, "scheduled_jobs")
"039":
namespace: (1, 29, "sqlliveness")
"040":
namespace: (1, 29, "migrations")
"041":
namespace: (1, 29, "join_tokens")
"042":
namespace: (1, 29, "statement_statistics")
"043":
namespace: (1, 29, "transaction_statistics")
"044":
namespace: (1, 29, "database_role_settings")
"045":
namespace: (1, 29, "tenant_usage")
"046":
namespace: (1, 29, "sql_instances")
"047":
namespace: (1, 29, "span_configurations")
"048":
namespace: (1, 29, "role_id_seq")
"050":
namespace: (1, 29, "tenant_settings")
"051":
namespace: (1, 29, "privileges")
"052":
namespace: (1, 29, "external_connections")
"053":
namespace: (1, 29, "job_info")
"054":
namespace: (1, 29, "span_stats_unique_keys")
"055":
namespace: (1, 29, "span_stats_buckets")
"056":
namespace: (1, 29, "span_stats_samples")
"057":
namespace: (1, 29, "span_stats_tenant_boundaries")
"058":
namespace: (1, 29, "task_payloads")
"059":
namespace: (1, 29, "tenant_tasks")
"060":
namespace: (1, 29, "statement_activity")
"061":
namespace: (1, 29, "transaction_activity")
"062":
namespace: (1, 29, "tenant_id_seq")
"100":
comments:
database: this is the default database
namespace: (0, 0, "defaultdb")
"101":
comments:
schema: this is the public schema
namespace: (100, 0, "public")
"102":
namespace: (0, 0, "postgres")
"103":
namespace: (102, 0, "public")
"104":
comments:
schema: this is a schema
namespace: (100, 0, "sc")
"105":
namespace: (100, 104, "greeting")
"106":
namespace: (100, 104, "_greeting")
"108":
comments:
constraint_1: this is a primary key constraint
constraint_2: this is a check constraint
table: this is a table
namespace: (100, 101, "kv")
"109":
comments:
index_2: this is an index
namespace: (100, 101, "mv")
trace:
- Scan /NamespaceTable/30/1
- Scan /Table/24/1
12 changes: 12 additions & 0 deletions pkg/sql/catalog/nstree/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@ type Catalog struct {
byteSize int64
}

// CommentCatalog is a limited interface wrapper, which is used for partial
// immutable catalogs that are incomeplete and only contain comment information.
type CommentCatalog interface {
ForEachComment(fn func(key catalogkeys.CommentKey, cmt string) error) error
ForEachCommentOnDescriptor(
id descpb.ID, fn func(key catalogkeys.CommentKey, cmt string) error) error
LookupComment(key catalogkeys.CommentKey) (_ string, found bool)
}

// Sanity: Catalog implements a comment catalog.
var _ CommentCatalog = Catalog{}

// ForEachDescriptor iterates over all descriptor table entries in an
// ordered fashion.
func (c Catalog) ForEachDescriptor(fn func(desc catalog.Descriptor) error) error {
Expand Down
Loading

0 comments on commit 63eb6b8

Please sign in to comment.