Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sqlstats: add retries to stats test on locked table #119977

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/sql/sqlstats/persistedsqlstats/cluster_settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ var sqlStatsLimitTableSizeEnabled = settings.RegisterBoolSetting(
true,
)

// sqlStatsLimitTableCheckInterval is the cluster setting the controls what
// SQLStatsLimitTableCheckInterval is the cluster setting the controls what
// interval the check is done if the statement and transaction statistics
// tables have grown past the sql.stats.persisted_rows.max.
var sqlStatsLimitTableCheckInterval = settings.RegisterDurationSetting(
var SQLStatsLimitTableCheckInterval = settings.RegisterDurationSetting(
settings.ApplicationLevel,
"sql.stats.limit_table_size_check.interval",
"controls what interval the check is done if the statement and "+
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sqlstats/persistedsqlstats/flush.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (s *PersistedSQLStats) Flush(ctx context.Context, stopper *stop.Stopper) {
func (s *PersistedSQLStats) StmtsLimitSizeReached(ctx context.Context) (bool, error) {
// Doing a count check on every flush for every node adds a lot of overhead.
// To reduce the overhead only do the check once an hour by default.
intervalToCheck := sqlStatsLimitTableCheckInterval.Get(&s.cfg.Settings.SV)
intervalToCheck := SQLStatsLimitTableCheckInterval.Get(&s.cfg.Settings.SV)
if !s.lastSizeCheck.IsZero() && s.lastSizeCheck.Add(intervalToCheck).After(timeutil.Now()) {
log.Infof(ctx, "PersistedSQLStats.StmtsLimitSizeReached skipped with last check at: %s and check interval: %s", s.lastSizeCheck, intervalToCheck)
return false, nil
Expand Down
40 changes: 23 additions & 17 deletions pkg/sql/sqlstats/persistedsqlstats/flush_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -682,8 +682,7 @@ func TestSQLStatsReadLimitSizeOnLockedTable(t *testing.T) {
// Ensure we have some rows in system.statement_statistics
require.GreaterOrEqual(t, stmtStatsCountFlush, minNumExpectedStmts)

// Set sql.stats.persisted_rows.max
sqlConn.Exec(t, fmt.Sprintf("SET CLUSTER SETTING sql.stats.persisted_rows.max=%d", maxNumPersistedRows))
persistedsqlstats.SQLStatsMaxPersistedRows.Override(ctx, &s.ClusterSettings().SV, maxNumPersistedRows)

// We need SucceedsSoon here for the follower read timestamp to catch up
// enough for this state to be reached.
Expand All @@ -705,7 +704,7 @@ func TestSQLStatsReadLimitSizeOnLockedTable(t *testing.T) {

// Set table size check interval to .0000001 second. So the next check doesn't
// use the cached value.
sqlConn.Exec(t, "SET CLUSTER SETTING sql.stats.limit_table_size_check.interval='.0000001s'")
persistedsqlstats.SQLStatsLimitTableCheckInterval.Override(ctx, &s.ClusterSettings().SV, time.Nanosecond)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did we need to override this instead of using the set statement above?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new code does the same thing. Override just sets the setting value directly.
This is an unnecessary change but one that simplifies test execution by removing the need for a full SQL parse/exec loop when we have the ability to set the value directly.


// Begin a transaction.
sqlConn.Exec(t, "BEGIN")
Expand All @@ -714,28 +713,35 @@ func TestSQLStatsReadLimitSizeOnLockedTable(t *testing.T) {

// Ensure that we can read from the table despite it being locked, due to the follower read (AOST).
// Expect that the number of statements in the table exceeds sql.stats.persisted_rows.max * 1.5
// (meaning that the limit will be reached) and no error. Loop to make sure that
// checking it multiple times still returns the correct value.
// (meaning that the limit will be reached) and no error. Every iteration picks a random shard, and we
// loop 3 times to make sure that we find at least one shard with a count over the limit. In the wild,
// we've observed individual shards only having a single statement recorded which makes this check fail
// otherwise.
foundLimit := false
for i := 0; i < 3; i++ {
limitReached, err = pss.StmtsLimitSizeReached(ctx)
require.NoError(t, err)
if !limitReached {
readStmt := `SELECT crdb_internal_aggregated_ts_app_name_fingerprint_id_node_id_plan_hash_transaction_fingerprint_id_shard_8, count(*)
if limitReached {
foundLimit = true
}
}

if !foundLimit {
readStmt := `SELECT crdb_internal_aggregated_ts_app_name_fingerprint_id_node_id_plan_hash_transaction_fingerprint_id_shard_8, count(*)
FROM system.statement_statistics
AS OF SYSTEM TIME follower_read_timestamp()
GROUP BY crdb_internal_aggregated_ts_app_name_fingerprint_id_node_id_plan_hash_transaction_fingerprint_id_shard_8`

sqlConn2 := sqlutils.MakeSQLRunner(s.SQLConn(t))
rows := sqlConn2.Query(t, readStmt)
shard := make([]int64, 8)
count := make([]int64, 8)
for j := 0; rows.Next(); {
err := rows.Scan(&shard[j], &count[j])
require.NoError(t, err)
j += 1
}
t.Fatalf("limitReached should be true. loop: %d; shards: %d counts: %d", i, shard, count)
sqlConn2 := sqlutils.MakeSQLRunner(s.SQLConn(t))
rows := sqlConn2.Query(t, readStmt)
shard := make([]int64, 8)
count := make([]int64, 8)
for j := 0; rows.Next(); {
err := rows.Scan(&shard[j], &count[j])
require.NoError(t, err)
j += 1
}
t.Fatalf("limitReached should be true. shards: %d counts: %d", shard, count)
}

// Close the transaction.
Expand Down
Loading