diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index d28d635a3d53..9f1e61413cf8 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -26,6 +26,7 @@ go_library( "autoupgrade.go", "awsdms.go", "backup.go", + "backup_fixtures.go", "build_info.go", "canary.go", "cancel.go", diff --git a/pkg/cmd/roachtest/tests/backup_fixtures.go b/pkg/cmd/roachtest/tests/backup_fixtures.go new file mode 100644 index 000000000000..1f4fa16479ab --- /dev/null +++ b/pkg/cmd/roachtest/tests/backup_fixtures.go @@ -0,0 +1,303 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "context" + "fmt" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/errors" + "github.com/stretchr/testify/require" +) + +func makeBackupFixtureSpecs(override scheduledBackupSpecs) scheduledBackupSpecs { + backupSpecs := makeBackupSpecs(override.backupSpecs, defaultBackupFixtureSpecs.backupSpecs) + specs := scheduledBackupSpecs{ + backupSpecs: backupSpecs, + crontab: defaultBackupFixtureSpecs.crontab, + } + if override.crontab != "" { + specs.crontab = override.crontab + } + override.ignoreExistingBackups = specs.ignoreExistingBackups + // TODO(msbutler): validate the crdb version roachtest will use. We don't want to create a 23.1.0 + // backup with a master binary, for example. + return specs +} + +// defaultBackupFixtureSpecs defines the default scheduled backup used to create a fixture. +var defaultBackupFixtureSpecs = scheduledBackupSpecs{ + crontab: "*/5 * * * *", + backupSpecs: backupSpecs{ + version: "23.1.0", + cloud: spec.AWS, + fullBackupDir: "LATEST", + backupsIncluded: 24, + workload: tpceRestore{ + customers: 25000, + }, + }, +} + +type scheduledBackupSpecs struct { + backupSpecs + + // ignoreExistingBackups if set to true, will allow a new backup chain + // to get written to an already existing backup collection. The default option + // of false prevents roachtest users from overriding the latest backup in a + // collection, which may be used in restore roachtests. + ignoreExistingBackups bool + crontab string +} + +func (sbs scheduledBackupSpecs) scheduledBackupCmd() string { + // This backup schedule will first run a full backup immediately and then the + // incremental backups at the given crontab cadence until the user cancels the + // backup schedules. To ensure that only one full backup chain gets created, + // begin the backup schedule at the beginning of the week, as a new full + // backup will get created on Sunday at Midnight ;) + var ignoreExistingBackupsOpt string + if sbs.ignoreExistingBackups { + ignoreExistingBackupsOpt = "ignore_existing_backups" + } + backupCmd := fmt.Sprintf(`BACKUP INTO %s WITH revision_history`, sbs.backupCollection()) + cmd := fmt.Sprintf(`CREATE SCHEDULE schedule_cluster FOR %s RECURRING '%s' FULL BACKUP '@weekly' WITH SCHEDULE OPTIONS first_run = 'now', %s`, + backupCmd, sbs.crontab, ignoreExistingBackupsOpt) + return cmd +} + +type backupFixtureSpecs struct { + // hardware specifies the roachprod specs to create the backup fixture on. + hardware hardwareSpecs + + // backup specifies the scheduled backup fixture which will be created. + backup scheduledBackupSpecs + + // initFromBackupSpecs, if specified, initializes the cluster via restore of an older fixture. + // The fields specified here will override any fields specified in the backup field above. + initFromBackupSpecs backupSpecs + + timeout time.Duration + tags map[string]struct{} + testName string + + // If non-empty, the test will be skipped with the supplied reason. + skip string +} + +func (bf *backupFixtureSpecs) initTestName() { + bf.testName = "backupFixture/" + bf.backup.workload.String() + "/" + bf.backup.cloud +} + +func makeBackupDriver(t test.Test, c cluster.Cluster, sp backupFixtureSpecs) backupDriver { + return backupDriver{ + t: t, + c: c, + sp: sp, + } +} + +type backupDriver struct { + sp backupFixtureSpecs + + t test.Test + c cluster.Cluster +} + +func (bd *backupDriver) prepareCluster(ctx context.Context) { + + if bd.c.Spec().Cloud != bd.sp.backup.cloud { + // For now, only run the test on the cloud provider that also stores the backup. + bd.t.Skip(fmt.Sprintf("test configured to run on %s", bd.sp.backup.cloud)) + } + + bd.c.Put(ctx, bd.t.Cockroach(), "./cockroach") + bd.c.Start(ctx, bd.t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(), bd.sp.hardware.getCRDBNodes()) + if !bd.sp.backup.ignoreExistingBackups { + // This check allows the roachtest to fail fast, instead of when the + // scheduled backup cmd is issued. + require.False(bd.t, bd.checkForExistingBackupCollection(ctx)) + } +} + +// checkForExistingBackupCollection returns true if there exists a backup in the collection path. +func (bd *backupDriver) checkForExistingBackupCollection(ctx context.Context) bool { + collectionQuery := fmt.Sprintf(`SELECT count(*) FROM [SHOW BACKUPS IN %s]`, + bd.sp.backup.backupCollection()) + conn := bd.c.Conn(ctx, bd.t.L(), 1) + sql := sqlutils.MakeSQLRunner(conn) + var collectionCount int + sql.QueryRow(bd.t, collectionQuery).Scan(&collectionCount) + return collectionCount > 0 +} + +func (bd *backupDriver) initWorkload(ctx context.Context) { + if bd.sp.initFromBackupSpecs.version == "" { + bd.t.L().Printf(`Initializing workload via ./workload init`) + bd.sp.backup.workload.initWorkload(ctx, bd.t, bd.c, bd.sp.hardware) + return + } + bd.t.L().Printf(`Initializing workload via restore`) + restoreDriver := makeRestoreDriver(bd.t, bd.c, restoreSpecs{ + hardware: bd.sp.hardware, + backup: makeBackupSpecs(bd.sp.initFromBackupSpecs, bd.sp.backup.backupSpecs), + }) + restoreDriver.getAOST(ctx) + // Only restore the database because a cluster restore will also restore the + // scheduled_jobs system table, which will automatically begin any backed up + // backup schedules, which complicates fixture generation. + target := fmt.Sprintf("DATABASE %s", restoreDriver.sp.backup.workload.DatabaseName()) + require.NoError(bd.t, restoreDriver.run(ctx, target)) +} + +func (bd *backupDriver) runWorkload(ctx context.Context) error { + return bd.sp.backup.workload.foregroundRun(ctx, bd.t, bd.c, bd.sp.hardware) +} + +// scheduleBackups begins the backup schedule. +func (bd *backupDriver) scheduleBackups(ctx context.Context) { + conn := bd.c.Conn(ctx, bd.t.L(), 1) + sql := sqlutils.MakeSQLRunner(conn) + sql.Exec(bd.t, bd.sp.backup.scheduledBackupCmd()) +} + +// monitorBackups pauses the schedule once the target number of backups in the +// chain have been taken. +func (bd *backupDriver) monitorBackups(ctx context.Context) { + conn := bd.c.Conn(ctx, bd.t.L(), 1) + sql := sqlutils.MakeSQLRunner(conn) + + for { + time.Sleep(1 * time.Minute) + var activeScheduleCount int + sql.QueryRow(bd.t, `SELECT count(*) FROM [SHOW SCHEDULES] WHERE label ='schedule_cluster' and schedule_status='ACTIVE'`).Scan(&activeScheduleCount) + if activeScheduleCount < 2 { + bd.t.L().Printf(`First full backup still running`) + continue + } + var backupCount int + backupCountQuery := fmt.Sprintf(`SELECT count(DISTINCT end_time) FROM [SHOW BACKUP FROM LATEST IN %s]`, bd.sp.backup.backupCollection()) + sql.QueryRow(bd.t, backupCountQuery).Scan(&backupCount) + bd.t.L().Printf(`%d scheduled backups taken`, backupCount) + if backupCount >= bd.sp.backup.backupsIncluded { + sql.QueryRow(bd.t, `PAUSE SCHEDULES WITH x AS (SHOW SCHEDULES) SELECT id FROM x WHERE label = 'schedule_cluster'`) + break + } + } +} + +func registerBackupFixtures(r registry.Registry) { + for _, bf := range []backupFixtureSpecs{ + { + // Default AWS Backup Fixture + hardware: makeHardwareSpecs(hardwareSpecs{workloadNode: true}), + backup: makeBackupFixtureSpecs(scheduledBackupSpecs{}), + timeout: 5 * time.Hour, + initFromBackupSpecs: backupSpecs{version: "v22.2.0"}, + skip: "only for fixture generation", + tags: registry.Tags("aws"), + }, + { + // Default Fixture, Run on GCE. Initiated by the tpce --init. + hardware: makeHardwareSpecs(hardwareSpecs{workloadNode: true}), + backup: makeBackupFixtureSpecs(scheduledBackupSpecs{ + backupSpecs: backupSpecs{ + cloud: spec.GCE}}), + timeout: 5 * time.Hour, + skip: "only for fixture generation", + }, + { + // 15 GB Backup Fixture. Note, this fixture is created every night to + // ensure the fixture generation code works. + hardware: makeHardwareSpecs(hardwareSpecs{workloadNode: true, cpus: 4}), + backup: makeBackupFixtureSpecs( + scheduledBackupSpecs{ + crontab: "*/2 * * * *", + ignoreExistingBackups: true, + backupSpecs: backupSpecs{ + backupsIncluded: 4, + workload: tpceRestore{customers: 1000}}}), + initFromBackupSpecs: backupSpecs{version: "v22.2.1", backupProperties: "inc-count=48"}, + timeout: 2 * time.Hour, + tags: registry.Tags("aws"), + }, + { + // 8TB Backup Fixture. + hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000, workloadNode: true}), + backup: makeBackupFixtureSpecs(scheduledBackupSpecs{ + backupSpecs: backupSpecs{ + workload: tpceRestore{customers: 500000}}}), + timeout: 25 * time.Hour, + initFromBackupSpecs: backupSpecs{version: "v22.2.1"}, + // add the weekly tags to allow an over 24 hour timeout. + tags: registry.Tags("weekly", "aws-weekly"), + skip: "only for fixture generation", + }, + { + // 32TB Backup Fixture. + hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000, workloadNode: true}), + backup: makeBackupFixtureSpecs(scheduledBackupSpecs{ + backupSpecs: backupSpecs{ + workload: tpceRestore{customers: 2000000}}}), + initFromBackupSpecs: backupSpecs{version: "v22.2.1"}, + timeout: 48 * time.Hour, + // add the weekly tags to allow an over 24 hour timeout. + tags: registry.Tags("weekly", "aws-weekly"), + skip: "only for fixture generation", + }, + } { + bf := bf + bf.initTestName() + r.Add(registry.TestSpec{ + Name: bf.testName, + Owner: registry.OwnerDisasterRecovery, + Cluster: bf.hardware.makeClusterSpecs(r, bf.backup.cloud), + Timeout: bf.timeout, + EncryptionSupport: registry.EncryptionMetamorphic, + Tags: bf.tags, + Skip: bf.skip, + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + + bd := makeBackupDriver(t, c, bf) + bd.prepareCluster(ctx) + bd.initWorkload(ctx) + m := c.NewMonitor(ctx) + + workloadCtx, workloadCancel := context.WithCancel(ctx) + defer workloadCancel() + workloadDoneCh := make(chan struct{}) + m.Go(func(ctx context.Context) error { + defer close(workloadDoneCh) + err := bd.runWorkload(workloadCtx) + // The workload should only return an error if the roachtest driver cancels the + // workloadCtx is cancelled after the backup schedule completes. + if err != nil && workloadCtx.Err() == nil { + // Implies the workload context was not cancelled and the workload cmd returned a + // different error. + return errors.Wrapf(err, `Workload context was not cancelled. Error returned by workload cmd`) + } + bd.t.L().Printf("workload successfully finished") + return nil + }) + bd.scheduleBackups(ctx) + bd.monitorBackups(ctx) + }, + }) + } +} diff --git a/pkg/cmd/roachtest/tests/registry.go b/pkg/cmd/roachtest/tests/registry.go index 9c79caf28e0f..b92c5c41e369 100644 --- a/pkg/cmd/roachtest/tests/registry.go +++ b/pkg/cmd/roachtest/tests/registry.go @@ -26,6 +26,7 @@ func RegisterTests(r registry.Registry) { registerBackup(r) registerBackupMixedVersion(r) registerBackupNodeShutdown(r) + registerBackupFixtures(r) registerCDC(r) registerCDCMixedVersions(r) registerExportParquet(r) diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go index baebe35b8307..7fe4ef1e7c1b 100644 --- a/pkg/cmd/roachtest/tests/restore.go +++ b/pkg/cmd/roachtest/tests/restore.go @@ -48,7 +48,7 @@ import ( func registerRestoreNodeShutdown(r registry.Registry) { sp := restoreSpecs{ hardware: makeHardwareSpecs(hardwareSpecs{}), - backup: makeBackupSpecs( + backup: makeRestoringBackupSpecs( backupSpecs{workload: tpceRestore{customers: 1000}, version: "v22.2.1"}), timeout: 1 * time.Hour, @@ -109,7 +109,7 @@ func registerRestore(r registry.Registry) { withPauseSpecs := restoreSpecs{ hardware: makeHardwareSpecs(hardwareSpecs{}), - backup: makeBackupSpecs( + backup: makeRestoringBackupSpecs( backupSpecs{workload: tpceRestore{customers: 1000}, version: "v22.2.1"}), timeout: 3 * time.Hour, @@ -269,7 +269,7 @@ func registerRestore(r registry.Registry) { for _, sp := range []restoreSpecs{ { hardware: makeHardwareSpecs(hardwareSpecs{}), - backup: makeBackupSpecs(backupSpecs{}), + backup: makeRestoringBackupSpecs(backupSpecs{}), timeout: 1 * time.Hour, tags: registry.Tags("aws"), }, @@ -277,21 +277,21 @@ func registerRestore(r registry.Registry) { // Note that the default specs in makeHardwareSpecs() spin up restore tests in aws, // by default. hardware: makeHardwareSpecs(hardwareSpecs{}), - backup: makeBackupSpecs(backupSpecs{cloud: spec.GCE}), + backup: makeRestoringBackupSpecs(backupSpecs{cloud: spec.GCE}), timeout: 1 * time.Hour, }, { // Benchmarks using a low memory per core ratio - we don't expect ideal // performance but nodes should not OOM. hardware: makeHardwareSpecs(hardwareSpecs{mem: spec.Low}), - backup: makeBackupSpecs(backupSpecs{cloud: spec.GCE}), + backup: makeRestoringBackupSpecs(backupSpecs{cloud: spec.GCE}), timeout: 1 * time.Hour, }, { // Benchmarks if per node throughput remains constant if the number of // nodes doubles relative to default. hardware: makeHardwareSpecs(hardwareSpecs{nodes: 8}), - backup: makeBackupSpecs(backupSpecs{}), + backup: makeRestoringBackupSpecs(backupSpecs{}), timeout: 1 * time.Hour, tags: registry.Tags("aws"), }, @@ -301,7 +301,7 @@ func registerRestore(r registry.Registry) { hardware: makeHardwareSpecs(hardwareSpecs{ nodes: 9, zones: []string{"us-east-2b", "us-west-2b", "eu-west-1b"}}), // These zones are AWS-specific. - backup: makeBackupSpecs(backupSpecs{cloud: spec.AWS}), + backup: makeRestoringBackupSpecs(backupSpecs{cloud: spec.AWS}), timeout: 90 * time.Minute, tags: registry.Tags("aws"), }, @@ -309,7 +309,7 @@ func registerRestore(r registry.Registry) { // Benchmarks if per node throughput doubles if the vcpu count doubles // relative to default. hardware: makeHardwareSpecs(hardwareSpecs{cpus: 16}), - backup: makeBackupSpecs(backupSpecs{}), + backup: makeRestoringBackupSpecs(backupSpecs{}), timeout: 1 * time.Hour, tags: registry.Tags("aws"), }, @@ -317,14 +317,14 @@ func registerRestore(r registry.Registry) { // Ensures we can restore a 48 length incremental chain. // Also benchmarks per node throughput for a long chain. hardware: makeHardwareSpecs(hardwareSpecs{}), - backup: makeBackupSpecs(backupSpecs{backupsIncluded: 48}), + backup: makeRestoringBackupSpecs(backupSpecs{backupsIncluded: 48}), timeout: 1 * time.Hour, tags: registry.Tags("aws"), }, { // The nightly 8TB Restore test. hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000}), - backup: makeBackupSpecs(backupSpecs{ + backup: makeRestoringBackupSpecs(backupSpecs{ version: "v22.2.1", workload: tpceRestore{customers: 500000}}), timeout: 5 * time.Hour, @@ -333,7 +333,7 @@ func registerRestore(r registry.Registry) { { // The weekly 32TB Restore test. hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000}), - backup: makeBackupSpecs(backupSpecs{ + backup: makeRestoringBackupSpecs(backupSpecs{ version: "v22.2.1", workload: tpceRestore{customers: 2000000}}), timeout: 24 * time.Hour, @@ -378,7 +378,7 @@ func registerRestore(r registry.Registry) { { // A teeny weeny 15GB restore that could be used to bisect scale agnostic perf regressions. hardware: makeHardwareSpecs(hardwareSpecs{}), - backup: makeBackupSpecs( + backup: makeRestoringBackupSpecs( backupSpecs{workload: tpceRestore{customers: 1000}, version: "v22.2.1"}), timeout: 3 * time.Hour, @@ -457,9 +457,12 @@ type hardwareSpecs struct { // cpus is the per node cpu count. cpus int - // nodes is the number of nodes in the restore. + // nodes is the number of crdb nodes in the restore. nodes int + // addWorkloadNode is true if workload node should also get spun up + workloadNode bool + // volumeSize indicates the size of per node block storage (pd-ssd for gcs, // ebs for aws). If zero, local ssd's are used. volumeSize int @@ -481,11 +484,15 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string if hw.mem != spec.Auto { clusterOpts = append(clusterOpts, spec.Mem(hw.mem)) } + addWorkloadNode := 0 + if hw.workloadNode { + addWorkloadNode++ + } if len(hw.zones) > 0 { clusterOpts = append(clusterOpts, spec.Zones(strings.Join(hw.zones, ","))) clusterOpts = append(clusterOpts, spec.Geo()) } - s := r.MakeClusterSpec(hw.nodes, clusterOpts...) + s := r.MakeClusterSpec(hw.nodes+addWorkloadNode, clusterOpts...) if backupCloud == spec.AWS && s.Cloud == spec.AWS && s.VolumeSize != 0 { // Work around an issue that RAID0s local NVMe and GP3 storage together: @@ -516,6 +523,21 @@ func (hw hardwareSpecs) String(verbose bool) string { return builder.String() } +func (hw hardwareSpecs) getWorkloadNode() int { + if hw.workloadNode { + return hw.nodes + 1 + } + return 0 +} + +func (hw hardwareSpecs) getCRDBNodes() option.NodeListOption { + nodes := make(option.NodeListOption, hw.nodes) + for i := range nodes { + nodes[i] = i + 1 + } + return nodes +} + // makeHardwareSpecs instantiates hardware specs for a restore roachtest. // Unless the caller provides any explicit specs, the default specs are used. func makeHardwareSpecs(override hardwareSpecs) hardwareSpecs { @@ -533,10 +555,11 @@ func makeHardwareSpecs(override hardwareSpecs) hardwareSpecs { specs.volumeSize = override.volumeSize } specs.zones = override.zones + specs.workloadNode = override.workloadNode return specs } -var defaultBackupSpecs = backupSpecs{ +var defaultRestoringBackupSpecs = backupSpecs{ // TODO(msbutler): write a script that automatically finds the latest versioned fixture. version: "v22.2.0", cloud: spec.AWS, @@ -571,16 +594,19 @@ type backupSpecs struct { // String returns a stringified version of the backup specs. Note that the // backup version, backup directory, and AOST are never included. +// +// TODO(msbutler): the semantics around specifying backupsIncluded and backupProperties is real +// confusing. Simplify this. func (bs backupSpecs) String(verbose bool) string { var builder strings.Builder builder.WriteString("/" + bs.workload.String()) - if verbose || bs.backupProperties != defaultBackupSpecs.backupProperties { + if verbose || bs.backupProperties != defaultRestoringBackupSpecs.backupProperties { builder.WriteString("/" + bs.backupProperties) } builder.WriteString("/" + bs.cloud) - if verbose || bs.backupsIncluded != defaultBackupSpecs.backupsIncluded { + if verbose || bs.backupsIncluded != defaultRestoringBackupSpecs.backupsIncluded { builder.WriteString("/" + fmt.Sprintf("backupsIncluded=%d", bs.backupsIncluded)) } return builder.String() @@ -608,11 +634,7 @@ func (bs backupSpecs) getAostCmd() string { bs.backupsIncluded) } -// makeBackupSpecs initializes the default backup specs. The caller can override -// any of the default backup specs by passing any non-nil params. -func makeBackupSpecs(override backupSpecs) backupSpecs { - specs := defaultBackupSpecs - +func makeBackupSpecs(override backupSpecs, specs backupSpecs) backupSpecs { if override.cloud != "" { specs.cloud = override.cloud } @@ -635,19 +657,59 @@ func makeBackupSpecs(override backupSpecs) backupSpecs { if override.workload != nil { specs.workload = override.workload } - return specs } +// makeRestoringBackupSpecs initializes the default restoring backup specs. The caller can override +// any of the default backup specs by passing any non-nil params. +func makeRestoringBackupSpecs(override backupSpecs) backupSpecs { + return makeBackupSpecs(override, defaultRestoringBackupSpecs) +} + type backupWorkload interface { fixtureDir() string String() string + + // DatabaseName specifies the name of the database the workload will operate on. + DatabaseName() string + + // initWorkload loads the cluster with the workload's schema and initial data. + initWorkload(ctx context.Context, t test.Test, c cluster.Cluster, sp hardwareSpecs) + + // foregroundRun begins a foreground workload that runs indefinitely until the passed context + // is cancelled. + foregroundRun(ctx context.Context, t test.Test, c cluster.Cluster, sp hardwareSpecs) error } type tpceRestore struct { customers int } +func (tpce tpceRestore) initWorkload( + ctx context.Context, t test.Test, c cluster.Cluster, sp hardwareSpecs, +) { + tpceSpec, err := initTPCESpec(ctx, t.L(), c, sp.getWorkloadNode(), sp.getCRDBNodes()) + require.NoError(t, err) + tpceSpec.init(ctx, t, c, tpceCmdOptions{ + customers: tpce.customers, + racks: sp.nodes}) +} + +func (tpce tpceRestore) foregroundRun( + ctx context.Context, t test.Test, c cluster.Cluster, sp hardwareSpecs, +) error { + tpceSpec, err := initTPCESpec(ctx, t.L(), c, sp.getWorkloadNode(), sp.getCRDBNodes()) + require.NoError(t, err) + + _, err = tpceSpec.run(ctx, t, c, tpceCmdOptions{ + // Set the duration to be a week to ensure the workload never exits early. + duration: time.Hour * 7 * 24, + customers: tpce.customers, + racks: sp.nodes, + threads: sp.cpus * sp.nodes}) + return err +} + func (tpce tpceRestore) fixtureDir() string { return fmt.Sprintf(`tpc-e/customers=%d`, tpce.customers) } @@ -672,6 +734,10 @@ func (tpce tpceRestore) String() string { return builder.String() } +func (tpce tpceRestore) DatabaseName() string { + return "tpce" +} + // restoreSpecs define input parameters to a restore roachtest set during // registration. They should not be modified within test_spec.run(), as they are shared // across driver runs. @@ -725,15 +791,17 @@ func makeRestoreDriver(t test.Test, c cluster.Cluster, sp restoreSpecs) restoreD } func (rd *restoreDriver) prepareCluster(ctx context.Context) { - if rd.c.Spec().Cloud != rd.sp.backup.cloud { // For now, only run the test on the cloud provider that also stores the backup. rd.t.Skipf("test configured to run on %s", rd.sp.backup.cloud) } - rd.c.Put(ctx, rd.t.Cockroach(), "./cockroach") rd.c.Start(ctx, rd.t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings()) + rd.getAOST(ctx) +} +// getAOST gets the AOST to use in the restore cmd. +func (rd *restoreDriver) getAOST(ctx context.Context) { var aost string conn := rd.c.Conn(ctx, rd.t.L(), 1) err := conn.QueryRowContext(ctx, rd.sp.backup.getAostCmd()).Scan(&aost)