Skip to content

Commit

Permalink
stmtdiagnostics: fix rare stress flake
Browse files Browse the repository at this point in the history
For reasons I do not want to think about, the server startup took more than
10s.

```
[07:46:17][Step 2/2] I210317 07:46:07.208310 3685 sql/temporary_schema.go:610  [n1] 209  temporary object cleaner next scheduled to run at 2021-03-17 08:16:07.205507825 +0000 UTC
[07:46:17][Step 2/2] I210317 07:46:07.210153 3695 sql/sqlliveness/slstorage/slstorage.go:352  [n1] 210  inserted sqlliveness session e8bd7965952f4a739b406f76c84bd0dd
[07:46:17][Step 2/2] I210317 07:46:07.210209 3695 sql/sqlliveness/slinstance/slinstance.go:144  [n1] 211  created new SQL liveness session e8bd7965952f4a739b406f76c84bd0dd
[07:46:17][Step 2/2] I210317 07:46:07.210499 3683 1@util/log/event_log.go:32  [n1] 212 ={"Timestamp":1615967167203026366,"EventType":"node_join","NodeID":1,"StartedAt":1615967167137956829,"LastUp":1615967167137956829}
[07:46:17][Step 2/2] I210317 07:46:08.981572 3392 1@gossip/gossip.go:1505  [n1] 213  node has connected to cluster via gossip
[07:46:17][Step 2/2] I210317 07:46:08.981683 3392 kv/kvserver/stores.go:269  [n1] 214  wrote 0 node addresses to persistent storage
[07:46:17][Step 2/2] I210317 07:46:12.148614 3997 kv/kvserver/replica_consistency.go:255  [n1,consistencyChecker,s1,r4/1:/System{/tsd-tse}] 215  triggering stats recomputation to resolve delta of {ContainsEstimates:1672 LastUpdateNanos:1615967167145269126 IntentAge:0 GCBytesAge:0 LiveBytes:-55936 LiveCount:-836 KeyBytes:-42560 KeyCount:-836 ValBytes:-13376 ValCount:-836 IntentBytes:0 IntentCount:0 SeparatedIntentCount:0 SysBytes:0 SysCount:0 AbortSpanBytes:0}
[07:46:17][Step 2/2] I210317 07:46:17.143751 3623 2@server/status/runtime.go:553  [n1] 216  runtime stats: 152 MiB RSS, 202 goroutines (stacks: 3.9 MiB), 33 MiB/67 MiB Go alloc/total (heap fragmentation: 6.8 MiB, heap reserved: 13 MiB, heap released: 7.0 MiB), 2.0 MiB/6.6 MiB CGO alloc/total (0.0 CGO/sec), 0.0/0.0 %(u/s)time, 0.0 %gc (0x), 14 MiB/14 MiB (r/w)net
[07:46:17][Step 2/2] I210317 07:46:17.161245 3603 3@vendor/github.com/cockroachdb/pebble/db.go:1449  [n1,pebble] 217  [JOB 4] WAL created 000005
[07:46:17][Step 2/2] W210317 07:46:17.291363 3716 sql/set_cluster_setting.go:320  [n1,intExec=optInToDiagnosticsStatReporting] 218  SET CLUSTER SETTING "diagnostics.reporting.enabled" timed out waiting for value "true", observed "false"
[07:46:17][Step 2/2] W210317 07:46:17.291930 3152 sqlmigrations/migrations.go:436  [n1] 219  failed to run SET CLUSTER SETTING diagnostics.reporting.enabled = true: optInToDiagnosticsStatReporting: setting updated but timed out waiting to read new value
[07:46:17][Step 2/2] I210317 07:46:17.342969 4046 util/log/event_log.go:32  [n1,intExec=optInToDiagnosticsStatReporting] 220 ={"Timestamp":1615967177339253721,"EventType":"set_cluster_setting","Statement":"SET CLUSTER SETTING \"diagnostics.reporting.enabled\" = true","User":"root","ApplicationName":"$ internal-optInToDiagnosticsStatReporting","SettingName":"diagnostics.reporting.enabled","Value":"true"}
[07:46:17][Step 2/2] I210317 07:46:17.347724 4091 migration/migrationmanager/manager.go:102  [n1,intExec=set-setting,migration-mgr] 221  no need to migrate, cluster already at newest version
[07:46:17][Step 2/2] I210317 07:46:17.353450 4091 util/log/event_log.go:32  [n1,intExec=set-setting] 222 ={"Timestamp":1615967177345048785,"EventType":"set_cluster_setting","Statement":"SET CLUSTER SETTING version = $1","User":"root","ApplicationName":"$ internal-set-setting","PlaceholderValues":["'20.2-48'"],"SettingName":"version","Value":"20.2-48"}
[07:46:17][Step 2/2] I210317 07:46:17.365911 4566 util/log/event_log.go:32  [n1,intExec=initializeClusterSecret] 223 ={"Timestamp":1615967177363067087,"EventType":"set_cluster_setting","Statement":"SET CLUSTER SETTING \"cluster.secret\" = gen_random_uuid()::STRING","User":"root","ApplicationName":"$ internal-initializeClusterSecret","SettingName":"cluster.secret","Value":"3dfbfa15-a35d-415c-a7d7-eccb33839f0b"}
[07:46:17][Step 2/2] I210317 07:46:17.369968 4166 5@util/log/event_log.go:32  [n1,intExec=create-default-DB] 224 ={"Timestamp":1615967177367703431,"EventType":"create_database","Statement":"CREATE DATABASE IF NOT EXISTS defaultdb","User":"root","DescriptorID":50,"ApplicationName":"$ internal-create-default-DB","DatabaseName":"defaultdb"}
[07:46:17][Step 2/2] I210317 07:46:17.372657 4576 5@util/log/event_log.go:32  [n1,intExec=create-default-DB] 225 ={"Timestamp":1615967177370176064,"EventType":"create_database","Statement":"CREATE DATABASE IF NOT EXISTS postgres","User":"root","DescriptorID":51,"ApplicationName":"$ internal-create-default-DB","DatabaseName":"postgres"}
[07:46:17][Step 2/2] I210317 07:46:17.389150 3152 server/server_sql.go:840  [n1] 226  done ensuring all necessary startup migrations have run
[07:46:17][Step 2/2] I210317 07:46:17.389222 3152 1@server/server.go:2061  [n1] 227  serving sql connections
[07:46:17][Step 2/2] I210317 07:46:17.389252 4840 jobs/job_scheduler.go:360  [n1] 228  waiting 2m0s before scheduled jobs daemon start
[07:46:17][Step 2/2]     statement_diagnostics_test.go:209:
[07:46:17][Step 2/2]         	Error Trace:	statement_diagnostics_test.go:209
[07:46:17][Step 2/2]         	Error:      	Not equal:
[07:46:17][Step 2/2]         	            	expected: 1
[07:46:17][Step 2/2]         	            	actual  : 2
[07:46:17][Step 2/2]         	Test:       	TestChangePollInterval
```

Fixes #62144.

Release note: None
  • Loading branch information
ajwerner committed Mar 18, 2021
1 parent e0b59da commit 61f1c9b
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/sql/stmtdiagnostics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ go_test(
"//pkg/security",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/settings/cluster",
"//pkg/sql",
"//pkg/testutils",
"//pkg/testutils/serverutils",
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/stmtdiagnostics/stament_diagnostics_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ func (r *Registry) InsertRequestInternal(ctx context.Context, fprint string) (in
id, err := r.insertRequestInternal(ctx, fprint)
return int64(id), err
}

// PollingInterval is exposed to override in tests.
var PollingInterval = pollingInterval
8 changes: 8 additions & 0 deletions pkg/sql/stmtdiagnostics/statement_diagnostics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import (
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/stmtdiagnostics"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -184,7 +186,13 @@ func TestChangePollInterval(t *testing.T) {
})
return seen
}
settings := cluster.MakeTestingClusterSettings()

// Set an extremely long initial polling interval to not hit flakes due to
// server startup taking more than 10s.
stmtdiagnostics.PollingInterval.Override(&settings.SV, time.Hour)
args := base.TestServerArgs{
Settings: settings,
Knobs: base.TestingKnobs{
Store: &kvserver.StoreTestingKnobs{
TestingRequestFilter: func(ctx context.Context, request roachpb.BatchRequest) *roachpb.Error {
Expand Down

0 comments on commit 61f1c9b

Please sign in to comment.