roachtest: refactor tpchvec a bit

This commit refactors `tpchvec` roachtest so that queries run in the query-major order rather than the config-major order. Previously, we would perform the cluster setup, run all queries on that setup, then perform the setup for the second test config, run all queries again, and then analyze the results. However, I believe for perf-oriented tests it's better to run each query on all configs right away (so that the chance of range movement was relatively low), and this commit makes such a change. This required the removal of `perf_no_stats` test config (which probably wasn't adding much value). Release note: None
cockroachdb · Jun 22, 2022 · b5b9699 · b5b9699
1 parent d1e8487
commit b5b9699
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 83 deletions.
diff --git a/pkg/cmd/roachtest/tests/tpc_utils.go b/pkg/cmd/roachtest/tests/tpc_utils.go
@@ -105,25 +105,15 @@ func scatterTables(t test.Test, conn *gosql.DB, tableNames []string) {
 	}
 }
 
-// disableAutoStats disables automatic collection of statistics on the cluster.
-func disableAutoStats(t test.Test, conn *gosql.DB) {
-	t.Status("disabling automatic collection of stats")
-	if _, err := conn.Exec(
-		`SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false;`,
-	); err != nil {
-		t.Fatal(err)
-	}
-}
-
-// createStatsFromTables runs "CREATE STATISTICS" statement for every table in
-// tableNames. It assumes that conn is already using the target database. If an
-// error is encountered, the test is failed.
+// createStatsFromTables runs ANALYZE statement for every table in tableNames.
+// It assumes that conn is already using the target database. If an error is
+// encountered, the test is failed.
 func createStatsFromTables(t test.Test, conn *gosql.DB, tableNames []string) {
 	t.Status("collecting stats")
 	for _, tableName := range tableNames {
 		t.Status(fmt.Sprintf("creating statistics from table %q", tableName))
 		if _, err := conn.Exec(
-			fmt.Sprintf(`CREATE STATISTICS %s FROM %s;`, tableName, tableName),
+			fmt.Sprintf(`ANALYZE %s;`, tableName),
 		); err != nil {
 			t.Fatal(err)
 		}

diff --git a/pkg/cmd/roachtest/tests/tpcdsvec.go b/pkg/cmd/roachtest/tests/tpcdsvec.go
@@ -62,7 +62,12 @@ func registerTPCDSVec(r registry.Registry) {
 		c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings())
 
 		clusterConn := c.Conn(ctx, t.L(), 1)
-		disableAutoStats(t, clusterConn)
+		t.Status("disabling automatic collection of stats")
+		if _, err := clusterConn.Exec(
+			`SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false;`,
+		); err != nil {
+			t.Fatal(err)
+		}
 		t.Status("restoring TPCDS dataset for Scale Factor 1")
 		if _, err := clusterConn.Exec(
 			`RESTORE DATABASE tpcds FROM 'gs://cockroach-fixtures/workload/tpcds/scalefactor=1/backup?AUTH=implicit';`,

diff --git a/pkg/cmd/roachtest/tests/tpchvec.go b/pkg/cmd/roachtest/tests/tpchvec.go
@@ -71,10 +71,8 @@ func performClusterSetup(t test.Test, conn *gosql.DB, clusterSetup []string) {
 type tpchVecTestCase interface {
 	// getRunConfig returns the configuration of tpchvec test run.
 	getRunConfig() tpchVecTestRunConfig
-	// preTestRunHook is called before any tpch query is run. Can be used to
-	// perform any setup that cannot be expressed as a modification to
-	// cluster-wide settings (those should go into tpchVecTestRunConfig).
-	preTestRunHook(ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string)
+	// preQueryRunHook is called before each tpch query is run.
+	preQueryRunHook(t test.Test, conn *gosql.DB, clusterSetup []string)
 	// postQueryRunHook is called after each tpch query is run with the output and
 	// the index of the setup it was run in.
 	postQueryRunHook(t test.Test, output []byte, setupIdx int)
@@ -102,13 +100,8 @@ func (b tpchVecTestCaseBase) getRunConfig() tpchVecTestRunConfig {
 	return runConfig
 }
 
-func (b tpchVecTestCaseBase) preTestRunHook(
-	t test.Test, conn *gosql.DB, clusterSetup []string, createStats bool,
-) {
+func (b tpchVecTestCaseBase) preQueryRunHook(t test.Test, conn *gosql.DB, clusterSetup []string) {
 	performClusterSetup(t, conn, clusterSetup)
-	if createStats {
-		createStatsFromTables(t, conn, tpchTables)
-	}
 }
 
 func (b tpchVecTestCaseBase) postQueryRunHook(test.Test, []byte, int) {}
@@ -161,30 +154,22 @@ type tpchVecPerfTest struct {
 	tpchVecTestCaseBase
 	*tpchVecPerfHelper
 
-	disableStatsCreation bool
-	settingName          string
-	slownessThreshold    float64
+	settingName       string
+	slownessThreshold float64
 }
 
 var _ tpchVecTestCase = &tpchVecPerfTest{}
 
-func newTpchVecPerfTest(
-	disableStatsCreation bool, settingName string, slownessThreshold float64,
-) *tpchVecPerfTest {
+func newTpchVecPerfTest(settingName string, slownessThreshold float64) *tpchVecPerfTest {
 	return &tpchVecPerfTest{
-		tpchVecPerfHelper:    newTpchVecPerfHelper(2 /* numSetups */),
-		disableStatsCreation: disableStatsCreation,
-		settingName:          settingName,
-		slownessThreshold:    slownessThreshold,
+		tpchVecPerfHelper: newTpchVecPerfHelper(2 /* numSetups */),
+		settingName:       settingName,
+		slownessThreshold: slownessThreshold,
 	}
 }
 
 func (p tpchVecPerfTest) getRunConfig() tpchVecTestRunConfig {
 	runConfig := p.tpchVecTestCaseBase.getRunConfig()
-	if p.disableStatsCreation {
-		// Query 9 takes too long without stats, so we'll skip it.
-		runConfig.queriesToRun = append(runConfig.queriesToRun[:8], runConfig.queriesToRun[9:]...)
-	}
 	runConfig.numRunsPerQuery = 3
 	// Make a copy of the default configuration setup and add different setting
 	// updates.
@@ -205,12 +190,6 @@ func (p tpchVecPerfTest) getRunConfig() tpchVecTestRunConfig {
 	return runConfig
 }
 
-func (p tpchVecPerfTest) preTestRunHook(
-	ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string,
-) {
-	p.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, !p.disableStatsCreation /* createStats */)
-}
-
 func (p *tpchVecPerfTest) postQueryRunHook(t test.Test, output []byte, setupIdx int) {
 	p.parseQueryOutput(t, output, setupIdx)
 }
@@ -369,12 +348,6 @@ func (b tpchVecBenchTest) getRunConfig() tpchVecTestRunConfig {
 	return runConfig
 }
 
-func (b tpchVecBenchTest) preTestRunHook(
-	_ context.Context, t test.Test, _ cluster.Cluster, conn *gosql.DB, clusterSetup []string,
-) {
-	b.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, true /* createStats */)
-}
-
 func (b *tpchVecBenchTest) postQueryRunHook(t test.Test, output []byte, setupIdx int) {
 	b.tpchVecPerfHelper.parseQueryOutput(t, output, setupIdx)
 }
@@ -438,10 +411,9 @@ type tpchVecDiskTest struct {
 	tpchVecTestCaseBase
 }
 
-func (d tpchVecDiskTest) preTestRunHook(
-	ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string,
-) {
-	d.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, true /* createStats */)
+func (d tpchVecDiskTest) getRunConfig() tpchVecTestRunConfig {
+	runConfig := d.tpchVecTestCaseBase.getRunConfig()
+
 	// In order to stress the disk spilling of the vectorized engine, we will
 	// set workmem limit to a random value in range [650KiB, 2000KiB).
 	//
@@ -455,26 +427,25 @@ func (d tpchVecDiskTest) preTestRunHook(
 	// of disk queues (limiting us to use at most 2 input partitions).
 	rng, _ := randutil.NewTestRand()
 	workmemInKiB := 650 + rng.Intn(1350)
-	workmem := fmt.Sprintf("%dKiB", workmemInKiB)
-	t.Status(fmt.Sprintf("setting workmem='%s'", workmem))
-	if _, err := conn.Exec(fmt.Sprintf("SET CLUSTER SETTING sql.distsql.temp_storage.workmem='%s'", workmem)); err != nil {
-		t.Fatal(err)
+	workmemQuery := fmt.Sprintf("SET CLUSTER SETTING sql.distsql.temp_storage.workmem='%dKiB'", workmemInKiB)
+	for i := range runConfig.clusterSetups {
+		runConfig.clusterSetups[i] = append(runConfig.clusterSetups[i], workmemQuery)
 	}
+	return runConfig
 }
 
 func baseTestRun(
 	ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, tc tpchVecTestCase,
 ) {
 	firstNode := c.Node(1)
 	runConfig := tc.getRunConfig()
-	for setupIdx, setup := range runConfig.clusterSetups {
-		t.Status(fmt.Sprintf("running setup=%s", runConfig.setupNames[setupIdx]))
-		tc.preTestRunHook(ctx, t, c, conn, setup)
-		for _, queryNum := range runConfig.queriesToRun {
+	for _, queryNum := range runConfig.queriesToRun {
+		for setupIdx, setup := range runConfig.clusterSetups {
+			tc.preQueryRunHook(t, conn, setup)
 			// Note that we use --default-vectorize flag which tells tpch
 			// workload to use the current cluster setting
 			// sql.defaults.vectorize which must have been set correctly in
-			// preTestRunHook.
+			// preQueryRunHook.
 			cmd := fmt.Sprintf("./workload run tpch --concurrency=1 --db=tpch "+
 				"--default-vectorize --max-ops=%d --queries=%d {pgurl:1} --enable-checks=true",
 				runConfig.numRunsPerQuery, queryNum)
@@ -497,10 +468,10 @@ type tpchVecSmithcmpTest struct {
 
 const tpchVecSmithcmp = "smithcmp"
 
-func (s tpchVecSmithcmpTest) preTestRunHook(
+func smithcmpPreTestRunHook(
 	ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string,
 ) {
-	s.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, true /* createStats */)
+	performClusterSetup(t, conn, clusterSetup)
 	const smithcmpSHA = "a3f41f5ba9273249c5ecfa6348ea8ee3ac4b77e3"
 	node := c.Node(1)
 	if c.IsLocal() && runtime.GOOS != "linux" {
@@ -527,7 +498,7 @@ func smithcmpTestRun(
 	ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, tc tpchVecTestCase,
 ) {
 	runConfig := tc.getRunConfig()
-	tc.preTestRunHook(ctx, t, c, conn, runConfig.clusterSetups[0])
+	smithcmpPreTestRunHook(ctx, t, c, conn, runConfig.clusterSetups[0])
 	const (
 		configFile = `tpchvec_smithcmp.toml`
 		configURL  = `https://raw.githubusercontent.com/cockroachdb/cockroach/master/pkg/cmd/roachtest/tests/` + configFile
@@ -555,7 +526,6 @@ func runTPCHVec(
 	c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings())
 
 	conn := c.Conn(ctx, t.L(), 1)
-	disableAutoStats(t, conn)
 	t.Status("restoring TPCH dataset for Scale Factor 1")
 	if err := loadTPCHDataset(
 		ctx, t, c, 1 /* sf */, c.NewMonitor(ctx), c.All(), true, /* disableMergeQueue */
@@ -570,6 +540,7 @@ func runTPCHVec(
 	t.Status("waiting for full replication")
 	err := WaitFor3XReplication(ctx, t, conn)
 	require.NoError(t, err)
+	createStatsFromTables(t, conn, tpchTables)
 
 	testRun(ctx, t, c, conn, testCase)
 	testCase.postTestRunHook(ctx, t, c, conn)
@@ -584,7 +555,6 @@ func registerTPCHVec(r registry.Registry) {
 		Cluster: r.MakeClusterSpec(tpchVecNodeCount),
 		Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
 			runTPCHVec(ctx, t, c, newTpchVecPerfTest(
-				false,                    /* disableStatsCreation */
 				"sql.defaults.vectorize", /* settingName */
 				1.5,                      /* slownessThreshold */
 			), baseTestRun)
@@ -610,26 +580,12 @@ func registerTPCHVec(r registry.Registry) {
 		},
 	})
 
-	r.Add(registry.TestSpec{
-		Name:    "tpchvec/perf_no_stats",
-		Owner:   registry.OwnerSQLQueries,
-		Cluster: r.MakeClusterSpec(tpchVecNodeCount),
-		Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
-			runTPCHVec(ctx, t, c, newTpchVecPerfTest(
-				true,                     /* disableStatsCreation */
-				"sql.defaults.vectorize", /* settingName */
-				1.5,                      /* slownessThreshold */
-			), baseTestRun)
-		},
-	})
-
 	r.Add(registry.TestSpec{
 		Name:    "tpchvec/streamer",
 		Owner:   registry.OwnerSQLQueries,
 		Cluster: r.MakeClusterSpec(tpchVecNodeCount),
 		Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
 			runTPCHVec(ctx, t, c, newTpchVecPerfTest(
-				false,                              /* disableStatsCreation */
 				"sql.distsql.use_streamer.enabled", /* settingName */
 				// TODO(yuzefovich): reduce the threshold over time.
 				3.0, /* slownessThreshold */