Skip to content

Commit

Permalink
roachtest: refactor tpchvec a bit
Browse files Browse the repository at this point in the history
This commit refactors `tpchvec` roachtest so that queries run in the
query-major order rather than the config-major order. Previously, we
would perform the cluster setup, run all queries on that setup, then
perform the setup for the second test config, run all queries again,
and then analyze the results. However, I believe for perf-oriented
tests it's better to run each query on all configs right away (so
that the chance of range movement was relatively low), and this
commit makes such a change. This required the removal of
`perf_no_stats` test config (which probably wasn't adding much value).

Release note: None
  • Loading branch information
yuzefovich committed Jun 22, 2022
1 parent d1e8487 commit b5b9699
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 83 deletions.
18 changes: 4 additions & 14 deletions pkg/cmd/roachtest/tests/tpc_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,25 +105,15 @@ func scatterTables(t test.Test, conn *gosql.DB, tableNames []string) {
}
}

// disableAutoStats disables automatic collection of statistics on the cluster.
func disableAutoStats(t test.Test, conn *gosql.DB) {
t.Status("disabling automatic collection of stats")
if _, err := conn.Exec(
`SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false;`,
); err != nil {
t.Fatal(err)
}
}

// createStatsFromTables runs "CREATE STATISTICS" statement for every table in
// tableNames. It assumes that conn is already using the target database. If an
// error is encountered, the test is failed.
// createStatsFromTables runs ANALYZE statement for every table in tableNames.
// It assumes that conn is already using the target database. If an error is
// encountered, the test is failed.
func createStatsFromTables(t test.Test, conn *gosql.DB, tableNames []string) {
t.Status("collecting stats")
for _, tableName := range tableNames {
t.Status(fmt.Sprintf("creating statistics from table %q", tableName))
if _, err := conn.Exec(
fmt.Sprintf(`CREATE STATISTICS %s FROM %s;`, tableName, tableName),
fmt.Sprintf(`ANALYZE %s;`, tableName),
); err != nil {
t.Fatal(err)
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/cmd/roachtest/tests/tpcdsvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ func registerTPCDSVec(r registry.Registry) {
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings())

clusterConn := c.Conn(ctx, t.L(), 1)
disableAutoStats(t, clusterConn)
t.Status("disabling automatic collection of stats")
if _, err := clusterConn.Exec(
`SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false;`,
); err != nil {
t.Fatal(err)
}
t.Status("restoring TPCDS dataset for Scale Factor 1")
if _, err := clusterConn.Exec(
`RESTORE DATABASE tpcds FROM 'gs://cockroach-fixtures/workload/tpcds/scalefactor=1/backup?AUTH=implicit';`,
Expand Down
92 changes: 24 additions & 68 deletions pkg/cmd/roachtest/tests/tpchvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,8 @@ func performClusterSetup(t test.Test, conn *gosql.DB, clusterSetup []string) {
type tpchVecTestCase interface {
// getRunConfig returns the configuration of tpchvec test run.
getRunConfig() tpchVecTestRunConfig
// preTestRunHook is called before any tpch query is run. Can be used to
// perform any setup that cannot be expressed as a modification to
// cluster-wide settings (those should go into tpchVecTestRunConfig).
preTestRunHook(ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string)
// preQueryRunHook is called before each tpch query is run.
preQueryRunHook(t test.Test, conn *gosql.DB, clusterSetup []string)
// postQueryRunHook is called after each tpch query is run with the output and
// the index of the setup it was run in.
postQueryRunHook(t test.Test, output []byte, setupIdx int)
Expand Down Expand Up @@ -102,13 +100,8 @@ func (b tpchVecTestCaseBase) getRunConfig() tpchVecTestRunConfig {
return runConfig
}

func (b tpchVecTestCaseBase) preTestRunHook(
t test.Test, conn *gosql.DB, clusterSetup []string, createStats bool,
) {
func (b tpchVecTestCaseBase) preQueryRunHook(t test.Test, conn *gosql.DB, clusterSetup []string) {
performClusterSetup(t, conn, clusterSetup)
if createStats {
createStatsFromTables(t, conn, tpchTables)
}
}

func (b tpchVecTestCaseBase) postQueryRunHook(test.Test, []byte, int) {}
Expand Down Expand Up @@ -161,30 +154,22 @@ type tpchVecPerfTest struct {
tpchVecTestCaseBase
*tpchVecPerfHelper

disableStatsCreation bool
settingName string
slownessThreshold float64
settingName string
slownessThreshold float64
}

var _ tpchVecTestCase = &tpchVecPerfTest{}

func newTpchVecPerfTest(
disableStatsCreation bool, settingName string, slownessThreshold float64,
) *tpchVecPerfTest {
func newTpchVecPerfTest(settingName string, slownessThreshold float64) *tpchVecPerfTest {
return &tpchVecPerfTest{
tpchVecPerfHelper: newTpchVecPerfHelper(2 /* numSetups */),
disableStatsCreation: disableStatsCreation,
settingName: settingName,
slownessThreshold: slownessThreshold,
tpchVecPerfHelper: newTpchVecPerfHelper(2 /* numSetups */),
settingName: settingName,
slownessThreshold: slownessThreshold,
}
}

func (p tpchVecPerfTest) getRunConfig() tpchVecTestRunConfig {
runConfig := p.tpchVecTestCaseBase.getRunConfig()
if p.disableStatsCreation {
// Query 9 takes too long without stats, so we'll skip it.
runConfig.queriesToRun = append(runConfig.queriesToRun[:8], runConfig.queriesToRun[9:]...)
}
runConfig.numRunsPerQuery = 3
// Make a copy of the default configuration setup and add different setting
// updates.
Expand All @@ -205,12 +190,6 @@ func (p tpchVecPerfTest) getRunConfig() tpchVecTestRunConfig {
return runConfig
}

func (p tpchVecPerfTest) preTestRunHook(
ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string,
) {
p.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, !p.disableStatsCreation /* createStats */)
}

func (p *tpchVecPerfTest) postQueryRunHook(t test.Test, output []byte, setupIdx int) {
p.parseQueryOutput(t, output, setupIdx)
}
Expand Down Expand Up @@ -369,12 +348,6 @@ func (b tpchVecBenchTest) getRunConfig() tpchVecTestRunConfig {
return runConfig
}

func (b tpchVecBenchTest) preTestRunHook(
_ context.Context, t test.Test, _ cluster.Cluster, conn *gosql.DB, clusterSetup []string,
) {
b.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, true /* createStats */)
}

func (b *tpchVecBenchTest) postQueryRunHook(t test.Test, output []byte, setupIdx int) {
b.tpchVecPerfHelper.parseQueryOutput(t, output, setupIdx)
}
Expand Down Expand Up @@ -438,10 +411,9 @@ type tpchVecDiskTest struct {
tpchVecTestCaseBase
}

func (d tpchVecDiskTest) preTestRunHook(
ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string,
) {
d.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, true /* createStats */)
func (d tpchVecDiskTest) getRunConfig() tpchVecTestRunConfig {
runConfig := d.tpchVecTestCaseBase.getRunConfig()

// In order to stress the disk spilling of the vectorized engine, we will
// set workmem limit to a random value in range [650KiB, 2000KiB).
//
Expand All @@ -455,26 +427,25 @@ func (d tpchVecDiskTest) preTestRunHook(
// of disk queues (limiting us to use at most 2 input partitions).
rng, _ := randutil.NewTestRand()
workmemInKiB := 650 + rng.Intn(1350)
workmem := fmt.Sprintf("%dKiB", workmemInKiB)
t.Status(fmt.Sprintf("setting workmem='%s'", workmem))
if _, err := conn.Exec(fmt.Sprintf("SET CLUSTER SETTING sql.distsql.temp_storage.workmem='%s'", workmem)); err != nil {
t.Fatal(err)
workmemQuery := fmt.Sprintf("SET CLUSTER SETTING sql.distsql.temp_storage.workmem='%dKiB'", workmemInKiB)
for i := range runConfig.clusterSetups {
runConfig.clusterSetups[i] = append(runConfig.clusterSetups[i], workmemQuery)
}
return runConfig
}

func baseTestRun(
ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, tc tpchVecTestCase,
) {
firstNode := c.Node(1)
runConfig := tc.getRunConfig()
for setupIdx, setup := range runConfig.clusterSetups {
t.Status(fmt.Sprintf("running setup=%s", runConfig.setupNames[setupIdx]))
tc.preTestRunHook(ctx, t, c, conn, setup)
for _, queryNum := range runConfig.queriesToRun {
for _, queryNum := range runConfig.queriesToRun {
for setupIdx, setup := range runConfig.clusterSetups {
tc.preQueryRunHook(t, conn, setup)
// Note that we use --default-vectorize flag which tells tpch
// workload to use the current cluster setting
// sql.defaults.vectorize which must have been set correctly in
// preTestRunHook.
// preQueryRunHook.
cmd := fmt.Sprintf("./workload run tpch --concurrency=1 --db=tpch "+
"--default-vectorize --max-ops=%d --queries=%d {pgurl:1} --enable-checks=true",
runConfig.numRunsPerQuery, queryNum)
Expand All @@ -497,10 +468,10 @@ type tpchVecSmithcmpTest struct {

const tpchVecSmithcmp = "smithcmp"

func (s tpchVecSmithcmpTest) preTestRunHook(
func smithcmpPreTestRunHook(
ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, clusterSetup []string,
) {
s.tpchVecTestCaseBase.preTestRunHook(t, conn, clusterSetup, true /* createStats */)
performClusterSetup(t, conn, clusterSetup)
const smithcmpSHA = "a3f41f5ba9273249c5ecfa6348ea8ee3ac4b77e3"
node := c.Node(1)
if c.IsLocal() && runtime.GOOS != "linux" {
Expand All @@ -527,7 +498,7 @@ func smithcmpTestRun(
ctx context.Context, t test.Test, c cluster.Cluster, conn *gosql.DB, tc tpchVecTestCase,
) {
runConfig := tc.getRunConfig()
tc.preTestRunHook(ctx, t, c, conn, runConfig.clusterSetups[0])
smithcmpPreTestRunHook(ctx, t, c, conn, runConfig.clusterSetups[0])
const (
configFile = `tpchvec_smithcmp.toml`
configURL = `https://raw.githubusercontent.com/cockroachdb/cockroach/master/pkg/cmd/roachtest/tests/` + configFile
Expand Down Expand Up @@ -555,7 +526,6 @@ func runTPCHVec(
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings())

conn := c.Conn(ctx, t.L(), 1)
disableAutoStats(t, conn)
t.Status("restoring TPCH dataset for Scale Factor 1")
if err := loadTPCHDataset(
ctx, t, c, 1 /* sf */, c.NewMonitor(ctx), c.All(), true, /* disableMergeQueue */
Expand All @@ -570,6 +540,7 @@ func runTPCHVec(
t.Status("waiting for full replication")
err := WaitFor3XReplication(ctx, t, conn)
require.NoError(t, err)
createStatsFromTables(t, conn, tpchTables)

testRun(ctx, t, c, conn, testCase)
testCase.postTestRunHook(ctx, t, c, conn)
Expand All @@ -584,7 +555,6 @@ func registerTPCHVec(r registry.Registry) {
Cluster: r.MakeClusterSpec(tpchVecNodeCount),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runTPCHVec(ctx, t, c, newTpchVecPerfTest(
false, /* disableStatsCreation */
"sql.defaults.vectorize", /* settingName */
1.5, /* slownessThreshold */
), baseTestRun)
Expand All @@ -610,26 +580,12 @@ func registerTPCHVec(r registry.Registry) {
},
})

r.Add(registry.TestSpec{
Name: "tpchvec/perf_no_stats",
Owner: registry.OwnerSQLQueries,
Cluster: r.MakeClusterSpec(tpchVecNodeCount),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runTPCHVec(ctx, t, c, newTpchVecPerfTest(
true, /* disableStatsCreation */
"sql.defaults.vectorize", /* settingName */
1.5, /* slownessThreshold */
), baseTestRun)
},
})

r.Add(registry.TestSpec{
Name: "tpchvec/streamer",
Owner: registry.OwnerSQLQueries,
Cluster: r.MakeClusterSpec(tpchVecNodeCount),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runTPCHVec(ctx, t, c, newTpchVecPerfTest(
false, /* disableStatsCreation */
"sql.distsql.use_streamer.enabled", /* settingName */
// TODO(yuzefovich): reduce the threshold over time.
3.0, /* slownessThreshold */
Expand Down

0 comments on commit b5b9699

Please sign in to comment.