diff --git a/build/teamcity/cockroach/nightlies/sqllogic_backup_restore.sh b/build/teamcity/cockroach/nightlies/sqllogic_backup_restore.sh new file mode 100755 index 000000000000..ec52084bf9a2 --- /dev/null +++ b/build/teamcity/cockroach/nightlies/sqllogic_backup_restore.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -euo pipefail + +dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" + +source "$dir/teamcity-support.sh" # For $root +source "$dir/teamcity-bazel-support.sh" # For run_bazel + +tc_start_block "Run SQL Logic Test Backup Restore" +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e BUILD_VCS_NUMBER -e GITHUB_API_TOKEN -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL" \ + run_bazel build/teamcity/cockroach/nightlies/sqllogic_backup_restore_impl.sh +tc_end_block "Run SQL Logic Test Backup Restore" diff --git a/build/teamcity/cockroach/nightlies/sqllogic_backup_restore_impl.sh b/build/teamcity/cockroach/nightlies/sqllogic_backup_restore_impl.sh new file mode 100755 index 000000000000..deda06d8b275 --- /dev/null +++ b/build/teamcity/cockroach/nightlies/sqllogic_backup_restore_impl.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -xeuo pipefail + +dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" +source "$dir/teamcity-bazel-support.sh" # For process_test_json +source "$dir/teamcity-support.sh" + +bazel build //pkg/cmd/bazci //pkg/cmd/github-post //pkg/cmd/testfilter --config=ci +BAZEL_BIN=$(bazel info bazel-bin --config=ci) +google_credentials="$GOOGLE_EPHEMERAL_CREDENTIALS" + +log_into_gcloud +export GOOGLE_APPLICATION_CREDENTIALS="$PWD/.google-credentials.json" + +ARTIFACTS_DIR=/artifacts +GO_TEST_JSON_OUTPUT_FILE=$ARTIFACTS_DIR/test.json.txt +exit_status=0 + +configs=( +local +multiregion-9node-3region-3azs +5node +multiregion-9node-3region-3azs-no-los +multiregion-9node-3region-3azs-tenant +multiregion-9node-3region-3azs-vec-off +multiregion-15node-5region-3azs +3node-tenant +3node-tenant-multiregion +) + +for config in "${configs[@]}"; do +$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci test -- --config=ci \ + //pkg/ccl/logictestccl/tests/$config/... \ + --test_arg=-show-sql \ + --test_env=COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY=0.5 \ + --test_env=GO_TEST_WRAP_TESTV=1 \ + --test_env=GO_TEST_WRAP=1 \ + --test_env=GO_TEST_JSON_OUTPUT_FILE=$GO_TEST_JSON_OUTPUT_FILE.$config \ + --test_timeout=28800 \ + --test_env=GOOGLE_APPLICATION_CREDENTIALS="$GOOGLE_APPLICATION_CREDENTIALS" \ + || exit_status=$? + +process_test_json \ + $BAZEL_BIN/pkg/cmd/testfilter/testfilter_/testfilter \ + $BAZEL_BIN/pkg/cmd/github-post/github-post_/github-post \ + $ARTIFACTS_DIR \ + $GO_TEST_JSON_OUTPUT_FILE.$config \ + $exit_status +done diff --git a/pkg/ccl/logictestccl/testdata/logic_test/as_of b/pkg/ccl/logictestccl/testdata/logic_test/as_of index 722406252bef..91d71ad87d96 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/as_of +++ b/pkg/ccl/logictestccl/testdata/logic_test/as_of @@ -1,4 +1,5 @@ # LogicTest: local +# BackupRestoreProbability: 0.0 statement ok CREATE TABLE t ( diff --git a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal index dde2f23e6a6b..bbfd927df2fb 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal +++ b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal @@ -35,6 +35,7 @@ CREATE table t2 (a STRING PRIMARY KEY) PARTITION BY LIST (a) ( # Since there are no zone configurations on any of these partitions, tables, # or databases, these partitions inherit directly from the default config. +skipif backup-restore query IITTITTTII SELECT * FROM crdb_internal.partitions ORDER BY table_id, index_id, name ---- @@ -64,6 +65,7 @@ CREATE TABLE t4 (a INT PRIMARY KEY, b INT); CREATE INDEX myt4index ON t4 (b); AL user testuser +skipif backup-restore query IT SELECT zone_id, target FROM crdb_internal.zones ORDER BY 1 ---- diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_implicit b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_implicit index 7d844fad3351..a393cc40710b 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_implicit +++ b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_implicit @@ -347,6 +347,7 @@ ALTER INDEX t_implicit_idx@t_a_idx PARTITION BY LIST (a) ( PARTITION "one" VALUES IN (1) ) +skipif backup-restore query TTTTTT colnames SELECT indexrelid, indrelid, indkey, indclass, indoption, indcollation diff --git a/pkg/ccl/logictestccl/testdata/logic_test/schema_change_in_txn b/pkg/ccl/logictestccl/testdata/logic_test/schema_change_in_txn index ed64ee65fced..e89155928a90 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/schema_change_in_txn +++ b/pkg/ccl/logictestccl/testdata/logic_test/schema_change_in_txn @@ -1,3 +1,8 @@ +# Backing up and restoring a descriptor will increment the version of the +# descriptor before restoring it so we cannot achieve the expected behaviour in +# this test. +# BackupRestoreProbability: 0.0 + # Regression test for a situation involving creating a table in a transaction # and altering the index when referenced by name. subtest index_resolution_does_not_lead_to_new_version diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index fb404fc2747f..1e78677b2458 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -550,6 +550,9 @@ var ( // writes not holding appropriate latches for range key stats update. useMVCCRangeTombstonesForPointDeletes = util.ConstantWithMetamorphicTestBool( "logictest-use-mvcc-range-tombstones-for-point-deletes", false) + + // BackupRestoreProbability is the environment variable for `3node-backup` config. + backupRestoreProbability = envutil.EnvOrDefaultFloat64("COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY", 0.0) ) const queryRewritePlaceholderPrefix = "__async_query_rewrite_placeholder" @@ -619,6 +622,10 @@ type pendingQuery struct { func (ls *logicStatement) readSQL( t *logicTest, s *logictestbase.LineScanner, allowSeparator bool, ) (separator bool, _ error) { + if err := t.maybeBackupRestore(t.rng, t.cfg); err != nil { + return false, err + } + var buf bytes.Buffer hasVars := false for s.Scan() { @@ -1666,6 +1673,7 @@ func (t *logicTest) shutdownCluster() { // resetCluster cleans up the current cluster, and creates a fresh one. func (t *logicTest) resetCluster() { + t.traceStop() t.shutdownCluster() if t.serverArgs == nil { // We expect the server args to be persisted to the test during test @@ -1981,9 +1989,6 @@ type subtestDetails struct { } func (t *logicTest) processTestFile(path string, config logictestbase.TestClusterConfig) error { - rng, seed := randutil.NewPseudoRand() - t.outf("rng seed: %d\n", seed) - subtests, err := fetchSubtests(path) if err != nil { return err @@ -2001,7 +2006,7 @@ func (t *logicTest) processTestFile(path string, config logictestbase.TestCluste // If subtest has no name, then it is not a subtest, so just run the lines // in the overall test. Note that this can only happen in the first subtest. if len(subtest.name) == 0 { - if err := t.processSubtest(subtest, path, config, rng); err != nil { + if err := t.processSubtest(subtest, path, config); err != nil { return err } } else { @@ -2011,7 +2016,7 @@ func (t *logicTest) processTestFile(path string, config logictestbase.TestCluste defer func() { t.subtestT = nil }() - if err := t.processSubtest(subtest, path, config, rng); err != nil { + if err := t.processSubtest(subtest, path, config); err != nil { t.Error(err) } }) @@ -2042,9 +2047,10 @@ func (t *logicTest) hasOpenTxns(ctx context.Context) bool { existingTxnPriority := "NORMAL" err := user.QueryRow("SHOW TRANSACTION PRIORITY").Scan(&existingTxnPriority) if err != nil { - // Ignore an error if we are unable to see transaction priority. + // If we are unable to see transaction priority assume we're in the middle + // of an explicit txn. log.Warningf(ctx, "failed to check txn priority with %v", err) - continue + return true } if _, err := user.Exec("SET TRANSACTION PRIORITY NORMAL;"); !testutils.IsError(err, "there is no transaction in progress") { // Reset the txn priority to what it was before we checked for open txns. @@ -2065,11 +2071,6 @@ func (t *logicTest) hasOpenTxns(ctx context.Context) bool { func (t *logicTest) maybeBackupRestore( rng *rand.Rand, config logictestbase.TestClusterConfig, ) error { - if config.BackupRestoreProbability != 0 && !config.IsCCLConfig { - return errors.Newf("logic test config %s specifies a backup restore probability but is not CCL", - config.Name) - } - // We decide if we want to take a backup here based on a probability // specified in the logic test config. if rng.Float64() > config.BackupRestoreProbability { @@ -2091,6 +2092,8 @@ func (t *logicTest) maybeBackupRestore( t.setUser(oldUser, 0 /* nodeIdxOverride */) }() + log.Info(context.Background(), "Running cluster backup and restore") + // To restore the same state in for the logic test, we need to restore the // data and the session state. The session state includes things like session // variables that are set for every session that is open. @@ -2139,13 +2142,13 @@ func (t *logicTest) maybeBackupRestore( userToSessionVars[user] = userSessionVars } - backupLocation := fmt.Sprintf("nodelocal://1/logic-test-backup-%s", + backupLocation := fmt.Sprintf("gs://cockroachdb-backup-testing/logic-test-backup-restore-nightly/%s?AUTH=implicit", strconv.FormatInt(timeutil.Now().UnixNano(), 10)) // Perform the backup and restore as root. t.setUser(username.RootUser, 0 /* nodeIdxOverride */) - if _, err := t.db.Exec(fmt.Sprintf("BACKUP TO '%s'", backupLocation)); err != nil { + if _, err := t.db.Exec(fmt.Sprintf("BACKUP INTO '%s'", backupLocation)); err != nil { return errors.Wrap(err, "backing up cluster") } @@ -2155,7 +2158,7 @@ func (t *logicTest) maybeBackupRestore( // Run the restore as root. t.setUser(username.RootUser, 0 /* nodeIdxOverride */) - if _, err := t.db.Exec(fmt.Sprintf("RESTORE FROM '%s'", backupLocation)); err != nil { + if _, err := t.db.Exec(fmt.Sprintf("RESTORE FROM LATEST IN '%s'", backupLocation)); err != nil { return errors.Wrap(err, "restoring cluster") } @@ -2252,7 +2255,7 @@ func (t *logicTest) purgeZoneConfig() { } func (t *logicTest) processSubtest( - subtest subtestDetails, path string, config logictestbase.TestClusterConfig, rng *rand.Rand, + subtest subtestDetails, path string, config logictestbase.TestClusterConfig, ) error { defer t.traceStop() @@ -2283,9 +2286,6 @@ func (t *logicTest) processSubtest( path, s.Line+subtest.lineLineIndexIntoFile, ) } - if err := t.maybeBackupRestore(rng, config); err != nil { - return err - } switch cmd { case "repeat": // A line "repeat X" makes the test repeat the following statement or query X times. @@ -2878,6 +2878,10 @@ func (t *logicTest) processSubtest( } s.SetSkip(fmt.Sprintf("unsupported configuration %s (%s)", configName, issue)) } + case "backup-restore": + if config.BackupRestoreProbability > 0.0 { + s.SetSkip("backup-restore interferes with this check") + } continue default: return errors.Errorf("unimplemented test statement: %s", s.Text()) @@ -3938,9 +3942,16 @@ func RunLogicTest( panic(errors.Wrapf(err, "could not set batch size for test")) } } + hasOverride, overriddenBackupRestoreProbability := logictestbase.ReadBackupRestoreProbabilityOverride(t, path) + config.BackupRestoreProbability = backupRestoreProbability + if hasOverride { + config.BackupRestoreProbability = overriddenBackupRestoreProbability + } + lt.setup( config, serverArgsCopy, readClusterOptions(t, path), readKnobOptions(t, path), readTenantClusterSettingOverrideArgs(t, path), ) + lt.runFile(path, config) progress.total += lt.progress diff --git a/pkg/sql/logictest/logictestbase/logictestbase.go b/pkg/sql/logictest/logictestbase/logictestbase.go index 6c03ec8dfa9e..31b71f1e1e98 100644 --- a/pkg/sql/logictest/logictestbase/logictestbase.go +++ b/pkg/sql/logictest/logictestbase/logictestbase.go @@ -73,7 +73,7 @@ type TestClusterConfig struct { // localities is set if nodes should be set to a particular locality. // Nodes are 1-indexed. Localities map[int]roachpb.Locality - // backupRestoreProbability will periodically backup the cluster and restore + // BackupRestoreProbability will periodically backup the cluster and restore // it's state to a new cluster at random points during a logic test. BackupRestoreProbability float64 // disableDeclarativeSchemaChanger will disable the declarative schema changer @@ -576,6 +576,52 @@ func (l stdlogger) Logf(format string, args ...interface{}) { fmt.Printf(format, args...) } +// ReadBackupRestoreProbabilityOverride reads any LogicTest directive at the +// beginning of a test file. A line that starts with "# +// BackupRestoreProbability:" specifies the probability with which we should run +// a cluster backup + restore between lines of the test file. +// +// Example: +// +// # BackupRestoreProbability: 0.8 +// +// If the file doesn't contain a directive, the value of the environment +// variable COCKROACH_LOGIC_TEST_BACKUP_RESTORE_PROBABILITY is used. +func ReadBackupRestoreProbabilityOverride( + t logger, path string, +) (hasOverride bool, probability float64) { + file, err := os.Open(path) + if err != nil { + t.Fatalf("failed open file %s", path) + } + defer file.Close() + + s := NewLineScanner(file) + for s.Scan() { + fields := strings.Fields(s.Text()) + if len(fields) == 0 { + continue + } + cmd := fields[0] + if !strings.HasPrefix(cmd, "#") { + // Stop at the first line that's not a comment (or empty). + break + } + if len(fields) > 1 && cmd == "#" && fields[1] == "BackupRestoreProbability:" { + if len(fields) == 2 { + t.Fatalf("%s: empty LogicTest directive", path) + } + probability, err := strconv.ParseFloat(fields[2], 64) + if err != nil { + t.Fatalf("failed to parse backup+restore probability: %+v", err) + } + return true, probability + } + } + + return false, 0 +} + // ReadTestFileConfigs reads any LogicTest directive at the beginning of a // test file. A line that starts with "# LogicTest:" specifies a list of // configuration names. The test file is run against each of those