Skip to content

Commit

Permalink
[yugabyte#15660] YSQL: ANALYZE catalog tables as part of initdb
Browse files Browse the repository at this point in the history
Summary:
With this diff, we analyze catalog tables as part of initdb.

ANALYZE is run only once for the template1 database.
Collected stats are copied from template1 to template0, postgres, and yugabyte databases because they use template1 as their template database.

Since PG system cache is not invalidated (stale cache) after direct UPDATE on catalog tables if the cache uses OID as part of its hash value calculation,
now ANALYZE reads tuples directly from pg_class table instead of RELOID system cache during initdb as a workaround for this stale cache issue.

A YSQL upgrade script is not provided for this initdb change because we don't know the state (number of tables, etc.) of existing clusters.
Thus, TestYsqlUpgrade is changed accordingly to skip checking the state of `pg_statistic`.

In addition, a few regression tests are updated because of the initial catalog statistic change.

Jira: DB-5026

Test Plan:
After `./yb_build.sh reinitdb`, manually verified catalog stats in `pg_class` and `pg_statistic` is collected.

./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressYbStat#testYbStat'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressYbBitmapScans#testPgRegressYbBitmapScans'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPushdownKey#testPgRegressPushdownKey'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPgStat#testPgStat'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressMisc#testPgRegressMiscSerial3'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressJson#testPgRegressJson'
./yb_build.sh --cxx-test pgwrapper_pg_catalog_version-test --gtest_filter PgCatalogVersionTest.FixCatalogVersionTable
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressTablegroup#testPgRegressTablegroup'
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressIndex#testPgRegressIndex'
./yb_build.sh --java-test 'org.yb.pgsql.TestYsqlDump'
./yb_build.sh --java-test 'org.yb.pgsql.TestYsqlUpgrade#migratingIsEquivalentToReinitdb'
./yb_build.sh --java-test 'org.yb.pgsql.TestYsqlUpgrade#upgradeIsIdempotentSingleConn'
./yb_build.sh --java-test 'org.yb.pgsql.TestYsqlUpgrade#upgradeIsIdempotent'

Reviewers: mihnea, kfranz, telgersma, amartsinchyk

Reviewed By: kfranz, amartsinchyk

Subscribers: jason, yql

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D34576
  • Loading branch information
yifanguan committed Jun 15, 2024
1 parent 033ba28 commit ccc3f8e
Show file tree
Hide file tree
Showing 22 changed files with 98 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public static String generate(File f1, File f2) throws IOException {
// you're in a terminal with 8-column tab stops, this causes the RHS to begin at a tab stop
// which will make the LHS and the RHS identical when the input files have embedded tabs.
// Embedded tabs aren't expanded even with the --expand-tabs flag.
String diffCmd = String.format("sdiff --width=157 --expand-tabs '%s' '%s'", f1, f2);
String diffCmd = String.format("sdiff -Z --width=157 --expand-tabs '%s' '%s'", f1, f2);
CommandResult commandResult = CommandUtil.runShellCommand(diffCmd);
List<String> stdoutLines = commandResult.getStdoutLines();
return StringUtil.joinLinesForLoggingNoPrefix(stdoutLines);
Expand Down
1 change: 0 additions & 1 deletion java/yb-pgsql/src/test/java/org/yb/pgsql/TestYsqlDump.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down
7 changes: 7 additions & 0 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/TestYsqlUpgrade.java
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,13 @@ private SysCatalogSnapshot takeSysCatalogSnapshot(Statement stmt) throws Excepti

for (Row tableInfoRow : tablesInfo) {
String tableName = tableInfoRow.getString(0);
// Different runs of ANALYZE on catalog tables can result in different statistics,
// and we don't know the state of existing clusters,
// so we don't provide YSQL migration scripts for catalog statistics.
// Thus, exclude capturing pg_statistic catalog from "snapshot".
if (tableName.equals("pg_statistic")) {
continue;
}
String query;
// Filter out stuff created for shared entities.
switch (tableName) {
Expand Down
29 changes: 27 additions & 2 deletions src/postgres/src/backend/commands/vacuum.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
#include "utils/syscache.h"
#include "utils/tqual.h"

/* YB includes. */
#include "access/sysattr.h"
#include "pg_yb_utils.h"

/*
Expand Down Expand Up @@ -894,8 +896,31 @@ vac_update_relstats(Relation relation,

rd = heap_open(RelationRelationId, RowExclusiveLock);

/* Fetch a copy of the tuple to scribble on */
ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
if (IsYugaByteEnabled() && YBIsInitDbModeEnvVarSet())
{
/*
* YB: workaround for stale cache issue #13500 during initdb.
* Instead of fetching a tuple from sys cache,
* read the tuple from pg_class directly.
*/
HeapScanDesc pg_class_scan;
ScanKeyData key[1];

ScanKeyInit(&key[0],
ObjectIdAttributeNumber,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));

pg_class_scan = heap_beginscan_catalog(rd, 1, key);
ctup = heap_getnext(pg_class_scan, ForwardScanDirection);
ctup = heap_copytuple(ctup);
heap_endscan(pg_class_scan);
}
else
{
/* Fetch a copy of the tuple to scribble on */
ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
}
if (!HeapTupleIsValid(ctup))
elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
relid);
Expand Down
17 changes: 12 additions & 5 deletions src/postgres/src/bin/initdb/initdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3234,11 +3234,18 @@ initialize_data_directory(void)
/* Enable pg_stat_statements */
enable_pg_stat_statements(cmdfd);

if (!IsYugaByteGlobalClusterInitdb())
{
/* Do not need to vacuum in YB */
vacuum_db(cmdfd);
}
/*
* YB: we used to skip the call of vacuum_db() because we don't need
* to vacuum in YB. As of 04/30/2024, we want to ANALYZE catalog tables
* to get initial stats.
* vacuum_db() runs both ANALYZE and VACUUM. Since VACUUM is a no-op in
* YB. It is safe to just call vacuum_db().
* All other databases: template0, postgres, yugabyte use template1 as the
* template database. The initial stats of their catalog tables is copied
* from template1 stats, so we don't need to do extra work to run ANALYZE
* for each of them.
*/
vacuum_db(cmdfd);

make_template0(cmdfd);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ SELECT * FROM pg_authid WHERE rolname LIKE 'pg_%' OR rolname LIKE 'yb_%' ORDER B
yb_fdw | f | t | f | f | f | f | f | -1 | |
(11 rows)

/*+ BitmapScan(pg_roles) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF) SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
/*+ BitmapScan(pg_authid) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF) SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
SELECT oid FROM pg_roles WHERE rolname = 'postgres' OR rolname LIKE 'pg_%' OR rolname LIKE 'yb_%');
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------
Expand All @@ -60,14 +60,14 @@ SELECT * FROM pg_authid WHERE rolname LIKE 'pg_%' OR rolname LIKE 'yb_%' ORDER B
Storage Index Filter: ((rolname = 'postgres'::name) OR (rolname ~~ 'pg_%'::text) OR (rolname ~~ 'yb_%'::text))
(15 rows)

/*+ BitmapScan(pg_roles) */ SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
/*+ BitmapScan(pg_authid) */ SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
SELECT oid FROM pg_roles WHERE rolname = 'postgres' OR rolname LIKE 'pg_%' OR rolname LIKE 'yb_%');
spcname
---------
(0 rows)

SET yb_enable_expression_pushdown = false;
/*+ BitmapScan(pg_roles) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF) SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
/*+ BitmapScan(pg_authid) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF) SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
SELECT oid FROM pg_roles WHERE rolname = 'postgres' OR rolname LIKE 'pg_%' OR rolname LIKE 'yb_%');
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Expand All @@ -86,7 +86,7 @@ SET yb_enable_expression_pushdown = false;
Index Cond: ((rolname >= 'yb'::name) AND (rolname < 'yc'::name))
(13 rows)

/*+ BitmapScan(pg_roles) */ SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
/*+ BitmapScan(pg_authid) */ SELECT spcname FROM pg_tablespace WHERE spcowner NOT IN (
SELECT oid FROM pg_roles WHERE rolname = 'postgres' OR rolname LIKE 'pg_%' OR rolname LIKE 'yb_%');
spcname
---------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ UNION
SELECT 'index' AS object_type, relname AS obj_name, obj_description(oid) AS comments FROM pg_class WHERE oid IN (SELECT indexrelid FROM pg_index WHERE indrelid=$1::regclass::oid)
UNION
SELECT 'constraint' AS object_type, conname AS obj_name, obj_description(oid) AS comments FROM pg_constraint WHERE conrelid=$1::regclass::oid
ORDER BY obj_name
ORDER BY obj_name, object_type
$$ LANGUAGE SQL;
-- Without specifying INCLUDING COMMENTS, comments are not copied.
CREATE TABLE comments1 (LIKE liketest1 INCLUDING INDEXES, LIKE liketest2 INCLUDING CONSTRAINTS);
Expand All @@ -202,8 +202,8 @@ SELECT * FROM get_comments('comments1');
column | c |
index | comments1_b_idx |
index | comments1_expr_idx |
index | comments1_pkey |
constraint | comments1_pkey |
index | comments1_pkey |
column | d |
constraint | liketest2_c_check |
(9 rows)
Expand Down Expand Up @@ -232,8 +232,8 @@ SELECT * FROM get_comments('comments3');
column | b | B
index | comments3_b_idx | index b
index | comments3_expr_idx |
index | comments3_pkey | index pkey
constraint | comments3_pkey |
index | comments3_pkey | index pkey
constraint | liketest1_a_check | a_check
(7 rows)

Expand Down
12 changes: 6 additions & 6 deletions src/postgres/src/test/regress/expected/yb_dml_systable_scan.out
Original file line number Diff line number Diff line change
Expand Up @@ -121,40 +121,40 @@ return null;
END;
$$ LANGUAGE plpgsql STABLE;
-- Expect pushdown in all cases.
EXPLAIN SELECT * FROM pg_database WHERE datname = test_null_pushdown();
EXPLAIN /*+IndexScan(pg_database)*/ SELECT * FROM pg_database WHERE datname = test_null_pushdown();
QUERY PLAN
-----------------------------------------------------------------------------------------------
Index Scan using pg_database_datname_index on pg_database (cost=0.00..4.37 rows=1 width=254)
Index Cond: (datname = test_null_pushdown())
(2 rows)

EXPLAIN SELECT * FROM pg_database WHERE datname IN (test_null_pushdown());
EXPLAIN /*+IndexScan(pg_database)*/ SELECT * FROM pg_database WHERE datname IN (test_null_pushdown());
QUERY PLAN
-----------------------------------------------------------------------------------------------
Index Scan using pg_database_datname_index on pg_database (cost=0.00..4.37 rows=1 width=254)
Index Cond: (datname = test_null_pushdown())
(2 rows)

EXPLAIN SELECT * FROM pg_database WHERE datname IN ('template1', test_null_pushdown(), 'template0');
EXPLAIN /*+IndexScan(pg_database)*/ SELECT * FROM pg_database WHERE datname IN ('template1', test_null_pushdown(), 'template0');
QUERY PLAN
---------------------------------------------------------------------------------------------------
Index Scan using pg_database_datname_index on pg_database (cost=0.00..4.37 rows=1 width=254)
Index Cond: (datname = ANY (ARRAY['template1'::name, test_null_pushdown(), 'template0'::name]))
(2 rows)

-- Test execution.
SELECT * FROM pg_database WHERE datname = test_null_pushdown();
/*+IndexScan(pg_database)*/ SELECT * FROM pg_database WHERE datname = test_null_pushdown();
datname | datdba | encoding | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | datacl
---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------
(0 rows)

SELECT * FROM pg_database WHERE datname IN (test_null_pushdown());
/*+IndexScan(pg_database)*/ SELECT * FROM pg_database WHERE datname IN (test_null_pushdown());
datname | datdba | encoding | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | datacl
---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------
(0 rows)

-- Test null mixed with valid (existing) options.
SELECT * FROM pg_database WHERE datname IN ('template1', test_null_pushdown(), 'template0');
/*+IndexScan(pg_database)*/ SELECT * FROM pg_database WHERE datname IN ('template1', test_null_pushdown(), 'template0');
datname | datdba | encoding | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | datacl
-----------+--------+----------+------------+-------------+---------------+--------------+--------------+---------------+--------------+------------+---------------+-------------------------------------
template0 | 10 | 6 | C | en_US.UTF-8 | t | f | -1 | 0 | 0 | 1 | 1663 | {=c/postgres,postgres=CTc/postgres}
Expand Down
7 changes: 4 additions & 3 deletions src/postgres/src/test/regress/expected/yb_pg_json.out
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,10 @@ SELECT row_to_json(row((select array_agg(x) as d from generate_series(5,10) x)),
select to_json(histogram_bounds) histogram_bounds
from pg_stats
where attname = 'tmplname' and tablename = 'pg_pltemplate';
histogram_bounds
------------------
(0 rows)
histogram_bounds
---------------------------------------------------------------------------------------
["plperl","plperlu","plpgsql","plpython2u","plpython3u","plpythonu","pltcl","pltclu"]
(1 row)

-- to_json, timestamps
select to_json(timestamp '2014-05-28 12:22:35.614298');
Expand Down
4 changes: 2 additions & 2 deletions src/postgres/src/test/regress/expected/yb_pg_stat_backend.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
-- this connection, and the java test.
SELECT datname, usename, state, query, backend_type,
catalog_version IS NOT null AS has_catalog_snapshot
FROM pg_stat_activity;
FROM pg_stat_activity ORDER BY usename;
datname | usename | state | query | backend_type | has_catalog_snapshot
----------+---------------+--------+------------------------------------------------------------+----------------+----------------------
yugabyte | yugabyte | active | SELECT datname, usename, state, query, backend_type, +| client backend | t
| | | catalog_version IS NOT null AS has_catalog_snapshot+| |
| | | FROM pg_stat_activity; | |
| | | FROM pg_stat_activity ORDER BY usename; | |
yugabyte | yugabyte_test | idle | COMMIT | client backend | f
| | | | checkpointer | f
(3 rows)
Expand Down
2 changes: 1 addition & 1 deletion src/postgres/src/test/regress/expected/yb_reindex.out
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ INFO: index "yb_j_idx" was reindexed
REINDEX INDEX pg_database_datname_index; -- fail
ERROR: cannot reindex shared system indexes
-- make sure index isn't broken after failure
/*+IndexOnlyScan(pg_database_datname_index)*/
/*+IndexOnlyScan(pg_database pg_database_datname_index)*/
SELECT datname from pg_database WHERE datname LIKE 'template%';
datname
-----------
Expand Down
8 changes: 4 additions & 4 deletions src/postgres/src/test/regress/expected/yb_stat.out
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,17 @@ show temp_file_limit;

SELECT * FROM generate_series(0, 1234567);
ERROR: temporary file size exceeds temp_file_limit (0kB)
SELECT databasename, termination_reason, query_text FROM yb_terminated_queries;
SELECT databasename, termination_reason, query_text FROM yb_terminated_queries ORDER BY databasename;
databasename | termination_reason | query_text
--------------------+---------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------
yugabyte | temporary file size exceeds temp_file_limit (0kB) | SELECT * FROM generate_series(0, 1000000);
yugabyte | temporary file size exceeds temp_file_limit (0kB) | SELECT 'bob' FROM generate_series(0, 1000000);
yugabyte | temporary file size exceeds temp_file_limit (1048576kB) | SELECT * FROM generate_series(0, 100000002);
db2 | temporary file size exceeds temp_file_limit (0kB) | SELECT * FROM generate_series(0, 1000001);
db2 | temporary file size exceeds temp_file_limit (0kB) | SELECT 'We were taught in this modern age that science and mathematics is the pinnacle of human achievement.' +
| | 'Yet, in our complacency, we began to neglect the very thing which our ancestors had once done: to challenge the process.'+
| | 'We need to stand back
test_user_database | temporary file size exceeds temp_file_limit (0kB) | SELECT * FROM generate_series(0, 1234567);
yugabyte | temporary file size exceeds temp_file_limit (0kB) | SELECT * FROM generate_series(0, 1000000);
yugabyte | temporary file size exceeds temp_file_limit (0kB) | SELECT 'bob' FROM generate_series(0, 1000000);
yugabyte | temporary file size exceeds temp_file_limit (1048576kB) | SELECT * FROM generate_series(0, 100000002);
(6 rows)

-- Drop the superuser privilege as we want to see if we would only see the terminated query
Expand Down
4 changes: 2 additions & 2 deletions src/postgres/src/test/regress/expected/yb_tablegroup.out
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,13 @@ CREATE TABLE tgroup_test6 (col1 int, col2 int) TABLEGROUP grp3;
Group Name | Group Owner | Access privileges | Group Description | Group Tablespace | Group Options | Name | Type | Owner | Rel Description | Size
------------------+-------------+-------------------+-------------------+------------------+---------------+--------------------------+-------+----------+-----------------+------
grp3 | yugabyte | | | tblspc | | tgroup_test6 | table | yugabyte | |
tgroup2 | yugabyte | | | | | tgroup_test3 | table | yugabyte | |
tgroup2 | yugabyte | | | | | tgroup_test3_col1_idx | index | yugabyte | |
tgroup2 | yugabyte | | | | | tgroup_test4 | table | yugabyte | |
tgroup2 | yugabyte | | | | | tgroup_test4_col1_idx | index | yugabyte | |
tgroup2 | yugabyte | | | | | tgroup_test3 | table | yugabyte | |
tgroup2 | yugabyte | | | | | tgroup_test4_col2_idx | index | yugabyte | |
tgroup_describe1 | yugabyte | | | | | tgroup_describe | table | yugabyte | |
tgroup_describe1 | yugabyte | | | | | tgroup_describe_col1_idx | index | yugabyte | |
tgroup_describe1 | yugabyte | | | | | tgroup_describe | table | yugabyte | |
(8 rows)

--
Expand Down
Loading

0 comments on commit ccc3f8e

Please sign in to comment.