Skip to content

Commit

Permalink
sql/catalog: avoid fetching descriptors when fetching comments
Browse files Browse the repository at this point in the history
Previously, we refactored the code to fetch comments, descriptors,
and zone config together in all cases. A side effect of this change
was that the crdb_internal.kv_system_comments table was substantially
slower for larger tables leading to big regressions. To address this,
this patch adds a method of only fetching comments within the
collections.

Informs: cockroachdb#102851
Fixes: cockroachdb#102613

Release note (bug fix): Optimize over-head of
pg_catalog.pg_description and pg_catalog.pg_shdescription, which
can lead to performance regression relative to 22.2
  • Loading branch information
fqazi committed May 10, 2023
1 parent 8f54e7e commit 48efd9f
Show file tree
Hide file tree
Showing 10 changed files with 271 additions and 19 deletions.
24 changes: 24 additions & 0 deletions pkg/bench/rttanalysis/orm_queries_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,30 @@ WHERE c.relkind IN ('f', 'm', 'p', 'r', 'v')
AND pg_catalog.pg_table_is_visible(c.oid)`,
},

{
Name: "django comment introspection with comments",
Setup: `CREATE TABLE t1(a int primary key, b int);
CREATE TABLE t2(a int primary key, b int);
CREATE TABLE t3(a int primary key, b int);
COMMENT ON TABLE t1 is 't1';
COMMENT ON TABLE t2 is 't2';
COMMENT ON TABLE t3 is 't1';
`,
Stmt: `SELECT
c.relname,
CASE
WHEN c.relispartition THEN 'p'
WHEN c.relkind IN ('m', 'v') THEN 'v'
ELSE 't'
END,
obj_description(c.oid, 'pg_class')
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind IN ('f', 'm', 'p', 'r', 'v')
AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
AND pg_catalog.pg_table_is_visible(c.oid);`,
},

{
Name: "activerecord type introspection query",
Stmt: `SELECT
Expand Down
1 change: 1 addition & 0 deletions pkg/bench/rttanalysis/testdata/benchmark_expectations
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ exp,benchmark
4,ORMQueries/django_column_introspection_1_table
4,ORMQueries/django_column_introspection_4_tables
4,ORMQueries/django_column_introspection_8_tables
5,ORMQueries/django_comment_introspection_with_comments
5,ORMQueries/django_table_introspection_1_table
5,ORMQueries/django_table_introspection_8_tables
0,ORMQueries/has_column_privilege_using_attnum
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/catalog/descs/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,23 @@ func (tc *Collection) GetAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog,
return ret.Catalog, nil
}

// GetAllComments gets all comments for all descriptors. This method never
// returns the underlying catalog, since it will be incomplete and only
// contain comments.
func (tc *Collection) GetAllComments(
ctx context.Context, txn *kv.Txn,
) (nstree.CommentCatalog, error) {
kvComments, err := tc.cr.ScanAllComments(ctx, txn)
if err != nil {
return nil, err
}
comments, err := tc.aggregateAllLayers(ctx, txn, kvComments)
if err != nil {
return nil, err
}
return comments, nil
}

// GetAllFromStorageUnvalidated delegates to an uncached catkv.CatalogReader's
// ScanAll method. Nothing is cached, validated or hydrated. This is to be used
// sparingly and only in situations which warrant it, where an unmediated view
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/catalog/internal/catkv/catalog_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ type CatalogReader interface {
// ScanAll scans the entirety of the descriptor and namespace tables.
ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error)

// ScanAllComments scans only the entirety of the comments table.
ScanAllComments(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error)

// ScanNamespaceForDatabases scans the portion of the namespace table which
// contains all database name entries.
ScanNamespaceForDatabases(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error)
Expand Down Expand Up @@ -159,6 +162,20 @@ func (cr catalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalo
return mc.Catalog, nil
}

// ScanAllComments is part of the CatalogReader interface.
func (cr catalogReader) ScanAllComments(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) {
var mc nstree.MutableCatalog
cq := catalogQuery{codec: cr.codec}
err := cq.query(ctx, txn, &mc, func(codec keys.SQLCodec, b *kv.Batch) {
scan(ctx, b, codec.IndexPrefix(keys.NamespaceTableID, catconstants.NamespaceTablePrimaryIndexID))
scan(ctx, b, catalogkeys.CommentsMetadataPrefix(codec))
})
if err != nil {
return nstree.Catalog{}, err
}
return mc.Catalog, nil
}

func (cr catalogReader) scanNamespace(
ctx context.Context, txn *kv.Txn, prefix roachpb.Key,
) (nstree.Catalog, error) {
Expand Down
27 changes: 27 additions & 0 deletions pkg/sql/catalog/internal/catkv/catalog_reader_cached.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type cachedCatalogReader struct {
// has* indicates previously completed lookups. When set, we
// know the corresponding catalog data is in the cache.
hasScanAll bool
hasScanAllComments bool
hasScanNamespaceForDatabases bool

// memAcc is the actual account of an injected, upstream monitor
Expand Down Expand Up @@ -149,6 +150,31 @@ func (c *cachedCatalogReader) Reset(ctx context.Context) {
}
}

// ScanAllComments is part of the CatalogReader interface.
func (c *cachedCatalogReader) ScanAllComments(
ctx context.Context, txn *kv.Txn,
) (nstree.Catalog, error) {
if c.hasScanAllComments {
return c.cache.Catalog, nil
}
// Scan all catalog comments.
read, err := c.cr.ScanAllComments(ctx, txn)
if err != nil {
return nstree.Catalog{}, err
}
// We don't wipe out anything we already read when
// updating the cache. So add the comments in and then
// add back any descriptors + comments we read earlier.
mergedCatalog := nstree.MutableCatalog{}
mergedCatalog.AddAll(read)
mergedCatalog.AddAll(c.cache.Catalog)
if err := c.ensure(ctx, mergedCatalog.Catalog); err != nil {
return nstree.Catalog{}, err
}
c.hasScanAllComments = true
return read, nil
}

// ScanAll is part of the CatalogReader interface.
func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) {
if c.hasScanAll {
Expand All @@ -173,6 +199,7 @@ func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.
}
c.hasScanAll = true
c.hasScanNamespaceForDatabases = true
c.hasScanAllComments = true
for id, s := range c.byIDState {
s.hasScanNamespaceForDatabaseEntries = true
s.hasScanNamespaceForDatabaseSchemas = true
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/catalog/internal/catkv/catalog_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ func TestDataDriven(t *testing.T) {
}
return h.doCatalogQuery(ctx, q)

case "scan_all_comments":
q := func(ctx context.Context, txn *kv.Txn, cr catkv.CatalogReader) (nstree.Catalog, error) {
return cr.ScanAllComments(ctx, txn)
}
return h.doCatalogQuery(ctx, q)

case "scan_namespace_for_databases":
q := func(ctx context.Context, txn *kv.Txn, cr catkv.CatalogReader) (nstree.Catalog, error) {
return cr.ScanNamespaceForDatabases(ctx, txn)
Expand Down
149 changes: 149 additions & 0 deletions pkg/sql/catalog/internal/catkv/testdata/testdata
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,152 @@ trace:
- Get /NamespaceTable/30/1/123/456/"foo"/4/1
cached:
- Get /NamespaceTable/30/1/123/456/"foo"/4/1

# Reset to clear any caching
reset
----

# Scanning all comments after resetting should only
# give us comments
scan_all_comments
----
catalog:
"001":
namespace: (0, 0, "system")
"003":
namespace: (1, 29, "descriptor")
"004":
namespace: (1, 29, "users")
"005":
namespace: (1, 29, "zones")
"006":
namespace: (1, 29, "settings")
"007":
namespace: (1, 29, "descriptor_id_seq")
"008":
namespace: (1, 29, "tenants")
"011":
namespace: (1, 29, "lease")
"012":
namespace: (1, 29, "eventlog")
"013":
namespace: (1, 29, "rangelog")
"014":
namespace: (1, 29, "ui")
"015":
namespace: (1, 29, "jobs")
"019":
namespace: (1, 29, "web_sessions")
"020":
namespace: (1, 29, "table_statistics")
"021":
namespace: (1, 29, "locations")
"023":
namespace: (1, 29, "role_members")
"024":
namespace: (1, 29, "comments")
"025":
namespace: (1, 29, "replication_constraint_stats")
"026":
namespace: (1, 29, "replication_critical_localities")
"027":
namespace: (1, 29, "replication_stats")
"028":
namespace: (1, 29, "reports_meta")
"029":
namespace: (1, 0, "public")
"030":
namespace: (1, 29, "namespace")
"031":
namespace: (1, 29, "protected_ts_meta")
"032":
namespace: (1, 29, "protected_ts_records")
"033":
namespace: (1, 29, "role_options")
"034":
namespace: (1, 29, "statement_bundle_chunks")
"035":
namespace: (1, 29, "statement_diagnostics_requests")
"036":
namespace: (1, 29, "statement_diagnostics")
"037":
namespace: (1, 29, "scheduled_jobs")
"039":
namespace: (1, 29, "sqlliveness")
"040":
namespace: (1, 29, "migrations")
"041":
namespace: (1, 29, "join_tokens")
"042":
namespace: (1, 29, "statement_statistics")
"043":
namespace: (1, 29, "transaction_statistics")
"044":
namespace: (1, 29, "database_role_settings")
"045":
namespace: (1, 29, "tenant_usage")
"046":
namespace: (1, 29, "sql_instances")
"047":
namespace: (1, 29, "span_configurations")
"048":
namespace: (1, 29, "role_id_seq")
"050":
namespace: (1, 29, "tenant_settings")
"051":
namespace: (1, 29, "privileges")
"052":
namespace: (1, 29, "external_connections")
"053":
namespace: (1, 29, "job_info")
"054":
namespace: (1, 29, "span_stats_unique_keys")
"055":
namespace: (1, 29, "span_stats_buckets")
"056":
namespace: (1, 29, "span_stats_samples")
"057":
namespace: (1, 29, "span_stats_tenant_boundaries")
"058":
namespace: (1, 29, "task_payloads")
"059":
namespace: (1, 29, "tenant_tasks")
"060":
namespace: (1, 29, "statement_activity")
"061":
namespace: (1, 29, "transaction_activity")
"062":
namespace: (1, 29, "tenant_id_seq")
"100":
comments:
database: this is the default database
namespace: (0, 0, "defaultdb")
"101":
comments:
schema: this is the public schema
namespace: (100, 0, "public")
"102":
namespace: (0, 0, "postgres")
"103":
namespace: (102, 0, "public")
"104":
comments:
schema: this is a schema
namespace: (100, 0, "sc")
"105":
namespace: (100, 104, "greeting")
"106":
namespace: (100, 104, "_greeting")
"108":
comments:
constraint_1: this is a primary key constraint
constraint_2: this is a check constraint
table: this is a table
namespace: (100, 101, "kv")
"109":
comments:
index_2: this is an index
namespace: (100, 101, "mv")
trace:
- Scan /NamespaceTable/30/1
- Scan /Table/24/1
10 changes: 10 additions & 0 deletions pkg/sql/catalog/nstree/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ type Catalog struct {
byteSize int64
}

// CommentCatalog is a limited interface wrapper, which is used for partial
// immutable catalogs that are incomeplete and only contain comment information.
type CommentCatalog interface {
ForEachComment(fn func(key catalogkeys.CommentKey, cmt string) error) error
LookupComment(key catalogkeys.CommentKey) (_ string, found bool)
}

// Sanity: Catalog implements a comment catalog.
var _ CommentCatalog = Catalog{}

// ForEachDescriptor iterates over all descriptor table entries in an
// ordered fashion.
func (c Catalog) ForEachDescriptor(fn func(desc catalog.Descriptor) error) error {
Expand Down
10 changes: 6 additions & 4 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -5411,13 +5411,16 @@ CREATE TABLE crdb_internal.kv_catalog_comments (
populate: func(
ctx context.Context, p *planner, dbContext catalog.DatabaseDescriptor, addRow func(...tree.Datum) error,
) error {
all, err := p.Descriptors().GetAll(ctx, p.Txn())
all, err := p.Descriptors().GetAllComments(ctx, p.Txn())
if err != nil {
return err
}
// Delegate privilege check to system table.
{
sysTable := all.LookupDescriptor(systemschema.CommentsTable.GetID())
sysTable, err := p.Descriptors().ByIDWithLeased(p.txn).Get().Table(ctx, systemschema.CommentsTable.GetID())
if err != nil {
return err
}
if ok, err := p.HasPrivilege(ctx, sysTable, privilege.SELECT, p.User()); err != nil {
return err
} else if !ok {
Expand All @@ -5439,8 +5442,7 @@ CREATE TABLE crdb_internal.kv_catalog_comments (
dct,
tree.NewDInt(tree.DInt(int64(key.ObjectID))),
tree.NewDInt(tree.DInt(int64(key.SubID))),
tree.NewDString(cmt),
)
tree.NewDString(cmt))
}); err != nil {
return err
}
Expand Down
29 changes: 14 additions & 15 deletions pkg/sql/pg_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -1579,20 +1579,20 @@ func getComments(ctx context.Context, p *planner) ([]tree.Datums, error) {
p.Txn(),
sessiondata.NodeUserSessionDataOverride,
`SELECT
object_id,
sub_id,
comment,
CASE type
WHEN 'DatabaseCommentType' THEN 0
WHEN 'TableCommentType' THEN 1
WHEN 'ColumnCommentType' THEN 2
WHEN 'IndexCommentType' THEN 3
WHEN 'SchemaCommentType' THEN 4
WHEN 'ConstraintCommentType' THEN 5
END
AS type
FROM
"".crdb_internal.kv_catalog_comments;`)
object_id,
sub_id,
comment,
CASE type
WHEN 'DatabaseCommentType' THEN 0
WHEN 'TableCommentType' THEN 1
WHEN 'ColumnCommentType' THEN 2
WHEN 'IndexCommentType' THEN 3
WHEN 'SchemaCommentType' THEN 4
WHEN 'ConstraintCommentType' THEN 5
END
AS type
FROM
"".crdb_internal.kv_catalog_comments;`)
}

var pgCatalogDescriptionTable = virtualSchemaTable{
Expand Down Expand Up @@ -1682,7 +1682,6 @@ https://www.postgresql.org/docs/9.5/catalog-pg-description.html`,
}
}
}

return nil
},
}
Expand Down

0 comments on commit 48efd9f

Please sign in to comment.