Skip to content

Commit

Permalink
cli: don't cpu profile local cluster in zip test
Browse files Browse the repository at this point in the history
We were CPU profiling two servers in parallel, but they were both
running in-mem in the same process, so only one of them could
succeed. This introduces randomness, which is undesired.

Remove all nonzero cpu profiling durations from the multi-node zip tests
and put a nonzero one into the single-node one.

Fixes #51538

Release justification: test flake fix
Release note: None
  • Loading branch information
tbg committed Aug 28, 2020
1 parent 8803452 commit 2efe1fa
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 22 deletions.
8 changes: 1 addition & 7 deletions pkg/cli/testdata/zip/partial1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zip
----
debug zip --cpu-profile-duration=1s /dev/null
debug zip --cpu-profile-duration=0s /dev/null
establishing RPC connection to ...
retrieving the node status to get the SQL address...
using SQL address: ...
Expand All @@ -26,12 +26,6 @@ retrieving SQL data for crdb_internal.partitions... writing: debug/crdb_internal
retrieving SQL data for crdb_internal.zones... writing: debug/crdb_internal.zones.txt
requesting nodes... writing: debug/nodes.json
requesting liveness... writing: debug/liveness.json
requesting CPU profiles... ok
writing: debug/nodes/1/cpu.pprof
writing: debug/nodes/2/cpu.pprof.err.txt
^- resulted in ...
writing: debug/nodes/3/cpu.pprof.err.txt
^- resulted in ...
writing: debug/nodes/1/status.json
using SQL connection URL for node 1: postgresql://...
retrieving SQL data for crdb_internal.feature_usage... writing: debug/nodes/1/crdb_internal.feature_usage.txt
Expand Down
8 changes: 1 addition & 7 deletions pkg/cli/testdata/zip/partial1_excluded
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zip
----
debug zip /dev/null --exclude-nodes=2 --cpu-profile-duration=1s
debug zip /dev/null --exclude-nodes=2 --cpu-profile-duration=0
establishing RPC connection to ...
retrieving the node status to get the SQL address...
using SQL address: ...
Expand All @@ -26,12 +26,6 @@ retrieving SQL data for crdb_internal.partitions... writing: debug/crdb_internal
retrieving SQL data for crdb_internal.zones... writing: debug/crdb_internal.zones.txt
requesting nodes... writing: debug/nodes.json
requesting liveness... writing: debug/liveness.json
requesting CPU profiles... ok
writing: debug/nodes/1/cpu.pprof
writing: debug/nodes/2/cpu.pprof.err.txt
^- resulted in ...
writing: debug/nodes/3/cpu.pprof.err.txt
^- resulted in ...
writing: debug/nodes/1/status.json
using SQL connection URL for node 1: postgresql://...
retrieving SQL data for crdb_internal.feature_usage... writing: debug/nodes/1/crdb_internal.feature_usage.txt
Expand Down
4 changes: 3 additions & 1 deletion pkg/cli/testdata/zip/testzip
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zip
----
debug zip --cpu-profile-duration=0 /dev/null
debug zip --cpu-profile-duration=1s /dev/null
establishing RPC connection to ...
retrieving the node status to get the SQL address...
using SQL address: ...
Expand All @@ -26,6 +26,8 @@ retrieving SQL data for crdb_internal.partitions... writing: debug/crdb_internal
retrieving SQL data for crdb_internal.zones... writing: debug/crdb_internal.zones.txt
requesting nodes... writing: debug/nodes.json
requesting liveness... writing: debug/liveness.json
requesting CPU profiles... ok
writing: debug/nodes/1/cpu.pprof
writing: debug/nodes/1/status.json
using SQL connection URL for node 1: postgresql://...
retrieving SQL data for crdb_internal.feature_usage... writing: debug/nodes/1/crdb_internal.feature_usage.txt
Expand Down
10 changes: 3 additions & 7 deletions pkg/cli/zip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func TestZip(t *testing.T) {
})
defer c.cleanup()

out, err := c.RunWithCapture("debug zip --cpu-profile-duration=0 " + os.DevNull)
out, err := c.RunWithCapture("debug zip --cpu-profile-duration=1s " + os.DevNull)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -283,8 +283,6 @@ func TestPartialZip(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.WithIssue(t, 51538)

// We want a low timeout so that the test doesn't take forever;
// however low timeouts make race runs flaky with false positives.
skip.UnderShort(t)
Expand All @@ -308,9 +306,7 @@ func TestPartialZip(t *testing.T) {
defer func(prevStderr *os.File) { stderr = prevStderr }(stderr)
stderr = os.Stdout

// NB: we spend a second profiling here to make sure it actually tries the
// down nodes (and fails only there, succeeding on the available node).
out, err := c.RunWithCapture("debug zip --cpu-profile-duration=1s " + os.DevNull)
out, err := c.RunWithCapture("debug zip --cpu-profile-duration=0s " + os.DevNull)
if err != nil {
t.Fatal(err)
}
Expand All @@ -324,7 +320,7 @@ func TestPartialZip(t *testing.T) {
})

// Now do it again and exclude the down node explicitly.
out, err = c.RunWithCapture("debug zip " + os.DevNull + " --exclude-nodes=2 --cpu-profile-duration=1s")
out, err = c.RunWithCapture("debug zip " + os.DevNull + " --exclude-nodes=2 --cpu-profile-duration=0")
if err != nil {
t.Fatal(err)
}
Expand Down

0 comments on commit 2efe1fa

Please sign in to comment.