From 48efd9fb2377af208bab310550a8bbed02d363cf Mon Sep 17 00:00:00 2001 From: Faizan Qazi Date: Wed, 10 May 2023 14:11:47 +0000 Subject: [PATCH] sql/catalog: avoid fetching descriptors when fetching comments Previously, we refactored the code to fetch comments, descriptors, and zone config together in all cases. A side effect of this change was that the crdb_internal.kv_system_comments table was substantially slower for larger tables leading to big regressions. To address this, this patch adds a method of only fetching comments within the collections. Informs: #102851 Fixes: #102613 Release note (bug fix): Optimize over-head of pg_catalog.pg_description and pg_catalog.pg_shdescription, which can lead to performance regression relative to 22.2 --- .../rttanalysis/orm_queries_bench_test.go | 24 +++ .../testdata/benchmark_expectations | 1 + pkg/sql/catalog/descs/collection.go | 17 ++ .../catalog/internal/catkv/catalog_reader.go | 17 ++ .../internal/catkv/catalog_reader_cached.go | 27 ++++ .../internal/catkv/catalog_reader_test.go | 6 + .../catalog/internal/catkv/testdata/testdata | 149 ++++++++++++++++++ pkg/sql/catalog/nstree/catalog.go | 10 ++ pkg/sql/crdb_internal.go | 10 +- pkg/sql/pg_catalog.go | 29 ++-- 10 files changed, 271 insertions(+), 19 deletions(-) diff --git a/pkg/bench/rttanalysis/orm_queries_bench_test.go b/pkg/bench/rttanalysis/orm_queries_bench_test.go index 421368cc42da..b5535d79123f 100644 --- a/pkg/bench/rttanalysis/orm_queries_bench_test.go +++ b/pkg/bench/rttanalysis/orm_queries_bench_test.go @@ -125,6 +125,30 @@ WHERE c.relkind IN ('f', 'm', 'p', 'r', 'v') AND pg_catalog.pg_table_is_visible(c.oid)`, }, + { + Name: "django comment introspection with comments", + Setup: `CREATE TABLE t1(a int primary key, b int); +CREATE TABLE t2(a int primary key, b int); +CREATE TABLE t3(a int primary key, b int); +COMMENT ON TABLE t1 is 't1'; +COMMENT ON TABLE t2 is 't2'; +COMMENT ON TABLE t3 is 't1'; +`, + Stmt: `SELECT + c.relname, + CASE + WHEN c.relispartition THEN 'p' + WHEN c.relkind IN ('m', 'v') THEN 'v' + ELSE 't' + END, + obj_description(c.oid, 'pg_class') + FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + WHERE c.relkind IN ('f', 'm', 'p', 'r', 'v') + AND n.nspname NOT IN ('pg_catalog', 'pg_toast') + AND pg_catalog.pg_table_is_visible(c.oid);`, + }, + { Name: "activerecord type introspection query", Stmt: `SELECT diff --git a/pkg/bench/rttanalysis/testdata/benchmark_expectations b/pkg/bench/rttanalysis/testdata/benchmark_expectations index 2d9cdc018a92..e6e7501b9004 100644 --- a/pkg/bench/rttanalysis/testdata/benchmark_expectations +++ b/pkg/bench/rttanalysis/testdata/benchmark_expectations @@ -65,6 +65,7 @@ exp,benchmark 4,ORMQueries/django_column_introspection_1_table 4,ORMQueries/django_column_introspection_4_tables 4,ORMQueries/django_column_introspection_8_tables +5,ORMQueries/django_comment_introspection_with_comments 5,ORMQueries/django_table_introspection_1_table 5,ORMQueries/django_table_introspection_8_tables 0,ORMQueries/has_column_privilege_using_attnum diff --git a/pkg/sql/catalog/descs/collection.go b/pkg/sql/catalog/descs/collection.go index afc5a05bc5b7..3f4af2020898 100644 --- a/pkg/sql/catalog/descs/collection.go +++ b/pkg/sql/catalog/descs/collection.go @@ -720,6 +720,23 @@ func (tc *Collection) GetAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, return ret.Catalog, nil } +// GetAllComments gets all comments for all descriptors. This method never +// returns the underlying catalog, since it will be incomplete and only +// contain comments. +func (tc *Collection) GetAllComments( + ctx context.Context, txn *kv.Txn, +) (nstree.CommentCatalog, error) { + kvComments, err := tc.cr.ScanAllComments(ctx, txn) + if err != nil { + return nil, err + } + comments, err := tc.aggregateAllLayers(ctx, txn, kvComments) + if err != nil { + return nil, err + } + return comments, nil +} + // GetAllFromStorageUnvalidated delegates to an uncached catkv.CatalogReader's // ScanAll method. Nothing is cached, validated or hydrated. This is to be used // sparingly and only in situations which warrant it, where an unmediated view diff --git a/pkg/sql/catalog/internal/catkv/catalog_reader.go b/pkg/sql/catalog/internal/catkv/catalog_reader.go index 8fa96654493c..79bfd05a2e5c 100644 --- a/pkg/sql/catalog/internal/catkv/catalog_reader.go +++ b/pkg/sql/catalog/internal/catkv/catalog_reader.go @@ -54,6 +54,9 @@ type CatalogReader interface { // ScanAll scans the entirety of the descriptor and namespace tables. ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) + // ScanAllComments scans only the entirety of the comments table. + ScanAllComments(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) + // ScanNamespaceForDatabases scans the portion of the namespace table which // contains all database name entries. ScanNamespaceForDatabases(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) @@ -159,6 +162,20 @@ func (cr catalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalo return mc.Catalog, nil } +// ScanAllComments is part of the CatalogReader interface. +func (cr catalogReader) ScanAllComments(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) { + var mc nstree.MutableCatalog + cq := catalogQuery{codec: cr.codec} + err := cq.query(ctx, txn, &mc, func(codec keys.SQLCodec, b *kv.Batch) { + scan(ctx, b, codec.IndexPrefix(keys.NamespaceTableID, catconstants.NamespaceTablePrimaryIndexID)) + scan(ctx, b, catalogkeys.CommentsMetadataPrefix(codec)) + }) + if err != nil { + return nstree.Catalog{}, err + } + return mc.Catalog, nil +} + func (cr catalogReader) scanNamespace( ctx context.Context, txn *kv.Txn, prefix roachpb.Key, ) (nstree.Catalog, error) { diff --git a/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go b/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go index 0f4e0e2b0d4b..30f385596c83 100644 --- a/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go +++ b/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go @@ -40,6 +40,7 @@ type cachedCatalogReader struct { // has* indicates previously completed lookups. When set, we // know the corresponding catalog data is in the cache. hasScanAll bool + hasScanAllComments bool hasScanNamespaceForDatabases bool // memAcc is the actual account of an injected, upstream monitor @@ -149,6 +150,31 @@ func (c *cachedCatalogReader) Reset(ctx context.Context) { } } +// ScanAllComments is part of the CatalogReader interface. +func (c *cachedCatalogReader) ScanAllComments( + ctx context.Context, txn *kv.Txn, +) (nstree.Catalog, error) { + if c.hasScanAllComments { + return c.cache.Catalog, nil + } + // Scan all catalog comments. + read, err := c.cr.ScanAllComments(ctx, txn) + if err != nil { + return nstree.Catalog{}, err + } + // We don't wipe out anything we already read when + // updating the cache. So add the comments in and then + // add back any descriptors + comments we read earlier. + mergedCatalog := nstree.MutableCatalog{} + mergedCatalog.AddAll(read) + mergedCatalog.AddAll(c.cache.Catalog) + if err := c.ensure(ctx, mergedCatalog.Catalog); err != nil { + return nstree.Catalog{}, err + } + c.hasScanAllComments = true + return read, nil +} + // ScanAll is part of the CatalogReader interface. func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree.Catalog, error) { if c.hasScanAll { @@ -173,6 +199,7 @@ func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree. } c.hasScanAll = true c.hasScanNamespaceForDatabases = true + c.hasScanAllComments = true for id, s := range c.byIDState { s.hasScanNamespaceForDatabaseEntries = true s.hasScanNamespaceForDatabaseSchemas = true diff --git a/pkg/sql/catalog/internal/catkv/catalog_reader_test.go b/pkg/sql/catalog/internal/catkv/catalog_reader_test.go index 0ebd7dce48be..022c5307614e 100644 --- a/pkg/sql/catalog/internal/catkv/catalog_reader_test.go +++ b/pkg/sql/catalog/internal/catkv/catalog_reader_test.go @@ -120,6 +120,12 @@ func TestDataDriven(t *testing.T) { } return h.doCatalogQuery(ctx, q) + case "scan_all_comments": + q := func(ctx context.Context, txn *kv.Txn, cr catkv.CatalogReader) (nstree.Catalog, error) { + return cr.ScanAllComments(ctx, txn) + } + return h.doCatalogQuery(ctx, q) + case "scan_namespace_for_databases": q := func(ctx context.Context, txn *kv.Txn, cr catkv.CatalogReader) (nstree.Catalog, error) { return cr.ScanNamespaceForDatabases(ctx, txn) diff --git a/pkg/sql/catalog/internal/catkv/testdata/testdata b/pkg/sql/catalog/internal/catkv/testdata/testdata index eba5741ee293..f58bde2eb2a7 100644 --- a/pkg/sql/catalog/internal/catkv/testdata/testdata +++ b/pkg/sql/catalog/internal/catkv/testdata/testdata @@ -454,3 +454,152 @@ trace: - Get /NamespaceTable/30/1/123/456/"foo"/4/1 cached: - Get /NamespaceTable/30/1/123/456/"foo"/4/1 + +# Reset to clear any caching +reset +---- + +# Scanning all comments after resetting should only +# give us comments +scan_all_comments +---- +catalog: + "001": + namespace: (0, 0, "system") + "003": + namespace: (1, 29, "descriptor") + "004": + namespace: (1, 29, "users") + "005": + namespace: (1, 29, "zones") + "006": + namespace: (1, 29, "settings") + "007": + namespace: (1, 29, "descriptor_id_seq") + "008": + namespace: (1, 29, "tenants") + "011": + namespace: (1, 29, "lease") + "012": + namespace: (1, 29, "eventlog") + "013": + namespace: (1, 29, "rangelog") + "014": + namespace: (1, 29, "ui") + "015": + namespace: (1, 29, "jobs") + "019": + namespace: (1, 29, "web_sessions") + "020": + namespace: (1, 29, "table_statistics") + "021": + namespace: (1, 29, "locations") + "023": + namespace: (1, 29, "role_members") + "024": + namespace: (1, 29, "comments") + "025": + namespace: (1, 29, "replication_constraint_stats") + "026": + namespace: (1, 29, "replication_critical_localities") + "027": + namespace: (1, 29, "replication_stats") + "028": + namespace: (1, 29, "reports_meta") + "029": + namespace: (1, 0, "public") + "030": + namespace: (1, 29, "namespace") + "031": + namespace: (1, 29, "protected_ts_meta") + "032": + namespace: (1, 29, "protected_ts_records") + "033": + namespace: (1, 29, "role_options") + "034": + namespace: (1, 29, "statement_bundle_chunks") + "035": + namespace: (1, 29, "statement_diagnostics_requests") + "036": + namespace: (1, 29, "statement_diagnostics") + "037": + namespace: (1, 29, "scheduled_jobs") + "039": + namespace: (1, 29, "sqlliveness") + "040": + namespace: (1, 29, "migrations") + "041": + namespace: (1, 29, "join_tokens") + "042": + namespace: (1, 29, "statement_statistics") + "043": + namespace: (1, 29, "transaction_statistics") + "044": + namespace: (1, 29, "database_role_settings") + "045": + namespace: (1, 29, "tenant_usage") + "046": + namespace: (1, 29, "sql_instances") + "047": + namespace: (1, 29, "span_configurations") + "048": + namespace: (1, 29, "role_id_seq") + "050": + namespace: (1, 29, "tenant_settings") + "051": + namespace: (1, 29, "privileges") + "052": + namespace: (1, 29, "external_connections") + "053": + namespace: (1, 29, "job_info") + "054": + namespace: (1, 29, "span_stats_unique_keys") + "055": + namespace: (1, 29, "span_stats_buckets") + "056": + namespace: (1, 29, "span_stats_samples") + "057": + namespace: (1, 29, "span_stats_tenant_boundaries") + "058": + namespace: (1, 29, "task_payloads") + "059": + namespace: (1, 29, "tenant_tasks") + "060": + namespace: (1, 29, "statement_activity") + "061": + namespace: (1, 29, "transaction_activity") + "062": + namespace: (1, 29, "tenant_id_seq") + "100": + comments: + database: this is the default database + namespace: (0, 0, "defaultdb") + "101": + comments: + schema: this is the public schema + namespace: (100, 0, "public") + "102": + namespace: (0, 0, "postgres") + "103": + namespace: (102, 0, "public") + "104": + comments: + schema: this is a schema + namespace: (100, 0, "sc") + "105": + namespace: (100, 104, "greeting") + "106": + namespace: (100, 104, "_greeting") + "108": + comments: + constraint_1: this is a primary key constraint + constraint_2: this is a check constraint + table: this is a table + namespace: (100, 101, "kv") + "109": + comments: + index_2: this is an index + namespace: (100, 101, "mv") +trace: +- Scan /NamespaceTable/30/1 +- Scan /Table/24/1 diff --git a/pkg/sql/catalog/nstree/catalog.go b/pkg/sql/catalog/nstree/catalog.go index 6c932cd269d4..1c307b1a6d19 100644 --- a/pkg/sql/catalog/nstree/catalog.go +++ b/pkg/sql/catalog/nstree/catalog.go @@ -31,6 +31,16 @@ type Catalog struct { byteSize int64 } +// CommentCatalog is a limited interface wrapper, which is used for partial +// immutable catalogs that are incomeplete and only contain comment information. +type CommentCatalog interface { + ForEachComment(fn func(key catalogkeys.CommentKey, cmt string) error) error + LookupComment(key catalogkeys.CommentKey) (_ string, found bool) +} + +// Sanity: Catalog implements a comment catalog. +var _ CommentCatalog = Catalog{} + // ForEachDescriptor iterates over all descriptor table entries in an // ordered fashion. func (c Catalog) ForEachDescriptor(fn func(desc catalog.Descriptor) error) error { diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index 11a9788e93fa..9b1c23ffed9d 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -5411,13 +5411,16 @@ CREATE TABLE crdb_internal.kv_catalog_comments ( populate: func( ctx context.Context, p *planner, dbContext catalog.DatabaseDescriptor, addRow func(...tree.Datum) error, ) error { - all, err := p.Descriptors().GetAll(ctx, p.Txn()) + all, err := p.Descriptors().GetAllComments(ctx, p.Txn()) if err != nil { return err } // Delegate privilege check to system table. { - sysTable := all.LookupDescriptor(systemschema.CommentsTable.GetID()) + sysTable, err := p.Descriptors().ByIDWithLeased(p.txn).Get().Table(ctx, systemschema.CommentsTable.GetID()) + if err != nil { + return err + } if ok, err := p.HasPrivilege(ctx, sysTable, privilege.SELECT, p.User()); err != nil { return err } else if !ok { @@ -5439,8 +5442,7 @@ CREATE TABLE crdb_internal.kv_catalog_comments ( dct, tree.NewDInt(tree.DInt(int64(key.ObjectID))), tree.NewDInt(tree.DInt(int64(key.SubID))), - tree.NewDString(cmt), - ) + tree.NewDString(cmt)) }); err != nil { return err } diff --git a/pkg/sql/pg_catalog.go b/pkg/sql/pg_catalog.go index 83dbcd2e9b30..426606ee506c 100644 --- a/pkg/sql/pg_catalog.go +++ b/pkg/sql/pg_catalog.go @@ -1579,20 +1579,20 @@ func getComments(ctx context.Context, p *planner) ([]tree.Datums, error) { p.Txn(), sessiondata.NodeUserSessionDataOverride, `SELECT - object_id, - sub_id, - comment, - CASE type - WHEN 'DatabaseCommentType' THEN 0 - WHEN 'TableCommentType' THEN 1 - WHEN 'ColumnCommentType' THEN 2 - WHEN 'IndexCommentType' THEN 3 - WHEN 'SchemaCommentType' THEN 4 - WHEN 'ConstraintCommentType' THEN 5 - END - AS type -FROM - "".crdb_internal.kv_catalog_comments;`) + object_id, + sub_id, + comment, + CASE type + WHEN 'DatabaseCommentType' THEN 0 + WHEN 'TableCommentType' THEN 1 + WHEN 'ColumnCommentType' THEN 2 + WHEN 'IndexCommentType' THEN 3 + WHEN 'SchemaCommentType' THEN 4 + WHEN 'ConstraintCommentType' THEN 5 + END + AS type + FROM + "".crdb_internal.kv_catalog_comments;`) } var pgCatalogDescriptionTable = virtualSchemaTable{ @@ -1682,7 +1682,6 @@ https://www.postgresql.org/docs/9.5/catalog-pg-description.html`, } } } - return nil }, }