Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtest: introduce new mixed-version testing API #92431

Merged
merged 3 commits into from
Jan 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ ALL_TESTS = [
"//pkg/cmd/reduce/reduce:reduce_test",
"//pkg/cmd/release:release_test",
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/roachtestutil/mixedversion:mixedversion_test",
"//pkg/cmd/roachtest/tests:tests_test",
"//pkg/cmd/roachtest:roachtest_test",
"//pkg/cmd/teamcity-trigger:teamcity-trigger_test",
Expand Down Expand Up @@ -1006,6 +1007,9 @@ GO_TARGETS = [
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/option:option",
"//pkg/cmd/roachtest/registry:registry",
"//pkg/cmd/roachtest/roachtestutil/clusterupgrade:clusterupgrade",
"//pkg/cmd/roachtest/roachtestutil/mixedversion:mixedversion",
"//pkg/cmd/roachtest/roachtestutil/mixedversion:mixedversion_test",
"//pkg/cmd/roachtest/roachtestutil:roachtestutil",
"//pkg/cmd/roachtest/spec:spec",
"//pkg/cmd/roachtest/test:test",
Expand Down Expand Up @@ -2467,6 +2471,8 @@ GET_X_DATA_TARGETS = [
"//pkg/cmd/roachtest/option:get_x_data",
"//pkg/cmd/roachtest/registry:get_x_data",
"//pkg/cmd/roachtest/roachtestutil:get_x_data",
"//pkg/cmd/roachtest/roachtestutil/clusterupgrade:get_x_data",
"//pkg/cmd/roachtest/roachtestutil/mixedversion:get_x_data",
"//pkg/cmd/roachtest/spec:get_x_data",
"//pkg/cmd/roachtest/test:get_x_data",
"//pkg/cmd/roachtest/tests:get_x_data",
Expand Down
26 changes: 26 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/clusterupgrade/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "clusterupgrade",
srcs = [
"clusterupgrade.go",
"predecessor_version.go",
],
embedsrcs = ["predecessor_version.json"],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/clusterupgrade",
visibility = ["//visibility:public"],
deps = [
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/option",
"//pkg/cmd/roachtest/test",
"//pkg/roachpb",
"//pkg/roachprod/install",
"//pkg/roachprod/logger",
"//pkg/util/retry",
"//pkg/util/version",
"@com_github_cockroachdb_errors//:errors",
],
)

get_x_data(name = "get_x_data")
270 changes: 270 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package clusterupgrade

import (
"context"
gosql "database/sql"
"fmt"
"math/rand"
"path/filepath"
"strconv"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/cockroach/pkg/util/version"
)

const (
// MainVersion is the sentinel used to represent that the binary
// passed to roachtest should be uploaded when `version` is left
// unspecified.
MainVersion = ""
)

// BinaryVersion returns the binary version running on the node
// associated with the given database connection.
// NB: version means major.minor[-internal]; the patch level isn't
// returned. For example, a binary of version 19.2.4 will return 19.2.
func BinaryVersion(db *gosql.DB) (roachpb.Version, error) {
zero := roachpb.Version{}
var sv string
if err := db.QueryRow(`SELECT crdb_internal.node_executable_version();`).Scan(&sv); err != nil {
return zero, err
}

if len(sv) == 0 {
return zero, fmt.Errorf("empty version")
}

return roachpb.ParseVersion(sv)
}

// ClusterVersion returns the cluster version active on the node
// associated with the given database connection. Note that the
// returned value might become stale due to the cluster auto-upgrading
// in the background plus gossip asynchronicity.
// NB: cluster versions are always major.minor[-internal]; there isn't
// a patch level.
func ClusterVersion(ctx context.Context, db *gosql.DB) (roachpb.Version, error) {
zero := roachpb.Version{}
var sv string
if err := db.QueryRowContext(ctx, `SHOW CLUSTER SETTING version`).Scan(&sv); err != nil {
return zero, err
}

return roachpb.ParseVersion(sv)
}

// UploadVersion uploads the specified crdb version to the given
// nodes. It returns the path of the uploaded binaries on the nodes,
// suitable to be used with `roachdprod start --binary=<path>`.
func UploadVersion(
ctx context.Context,
t test.Test,
l *logger.Logger,
c cluster.Cluster,
nodes option.NodeListOption,
newVersion string,
) (string, error) {
binaryName := "./cockroach"
if newVersion == MainVersion {
if err := c.PutE(ctx, l, t.Cockroach(), binaryName, nodes); err != nil {
return "", err
}
} else if binary, ok := t.VersionsBinaryOverride()[newVersion]; ok {
// If an override has been specified for newVersion, use that binary.
l.Printf("using binary override for version %s: %s", newVersion, binary)
binaryName = "./cockroach-" + newVersion
if err := c.PutE(ctx, l, binary, binaryName, nodes); err != nil {
return "", err
}
} else {
v := "v" + newVersion
dir := v
binaryName = filepath.Join(dir, "cockroach")
// Check if the cockroach binary already exists.
if err := c.RunE(ctx, nodes, "test", "-e", binaryName); err != nil {
if err := c.RunE(ctx, nodes, "mkdir", "-p", dir); err != nil {
return "", err
}
if err := c.Stage(ctx, l, "release", v, dir, nodes); err != nil {
return "", err
}
}
}
return BinaryPathFromVersion(newVersion), nil
}

// InstallFixtures copies the previously created fixtures (in
// pkg/cmd/roachtest/fixtures) for the given version to the nodes
// passed. After this step, the corresponding binary can be started on
// the cluster and it will use that store directory.
func InstallFixtures(
ctx context.Context, l *logger.Logger, c cluster.Cluster, nodes option.NodeListOption, v string,
) error {
c.Run(ctx, nodes, "mkdir -p {store-dir}")
vv := version.MustParse("v" + v)
// The fixtures use cluster version (major.minor) but the input might be
// a patch release.
name := CheckpointName(
roachpb.Version{Major: int32(vv.Major()), Minor: int32(vv.Minor())}.String(),
)
for _, n := range nodes {
if err := c.PutE(ctx, l,
"pkg/cmd/roachtest/fixtures/"+strconv.Itoa(n)+"/"+name+".tgz",
"{store-dir}/fixture.tgz", c.Node(n),
); err != nil {
return err
}
}
// Extract fixture. Fail if there's already an LSM in the store dir.
c.Run(ctx, nodes, "cd {store-dir} && [ ! -f {store-dir}/CURRENT ] && tar -xf fixture.tgz")
return nil
}

// StartWithBinary starts a cockroach binary, assumed to already be
// present in the nodes in the path given.
func StartWithBinary(
ctx context.Context,
l *logger.Logger,
c cluster.Cluster,
nodes option.NodeListOption,
binaryPath string,
startOpts option.StartOpts,
) {
settings := install.MakeClusterSettings(install.BinaryOption(binaryPath))
c.Start(ctx, l, startOpts, settings, nodes)
}

// BinaryPathFromVersion shows where the binary for the given version
// can be found on roachprod nodes. It's either `./cockroach` or the
// path to which a released binary is staged.
func BinaryPathFromVersion(v string) string {
if v == "" {
return "./cockroach"
}
return filepath.Join("v"+v, "cockroach")
}

// RestartNodesWithNewBinary uploads a given cockroach version to the
// nodes passed, and restarts the cockroach process.
func RestartNodesWithNewBinary(
ctx context.Context,
t test.Test,
l *logger.Logger,
c cluster.Cluster,
nodes option.NodeListOption,
startOpts option.StartOpts,
newVersion string,
) error {
// NB: We could technically stage the binary on all nodes before
// restarting each one, but on Unix it's invalid to write to an
// executable file while it is currently running. So we do the
// simple thing and upload it serially instead.

// Restart nodes in a random order; otherwise node 1 would be running all
// the migrations and it probably also has all the leases.
rand.Shuffle(len(nodes), func(i, j int) {
nodes[i], nodes[j] = nodes[j], nodes[i]
})
for _, node := range nodes {
l.Printf("restarting node %d into version %s", node, VersionMsg(newVersion))
// Stop the cockroach process gracefully in order to drain it properly.
// This makes the upgrade closer to how users do it in production, but
// it's also needed to eliminate flakiness. In particular, this will
// make sure that DistSQL draining information is dissipated through
// gossip so that other nodes running an older version don't consider
// this upgraded node for DistSQL plans (see #87154 for more details).
// TODO(yuzefovich): ideally, we would also check that the drain was
// successful since if it wasn't, then we might see flakes too.
if err := c.StopCockroachGracefullyOnNode(ctx, l, node); err != nil {
return err
}

binary, err := UploadVersion(ctx, t, l, c, c.Node(node), newVersion)
if err != nil {
return err
}
StartWithBinary(ctx, l, c, c.Node(node), binary, startOpts)

// We have seen cases where a transient error could occur when this
// newly upgraded node serves as a gateway for a distributed query due
// to remote nodes not being able to dial back to the gateway for some
// reason (investigation of it is tracked in #87634). For now, we're
// papering over these flakes by this sleep. For more context, see
// #87104.
// TODO(yuzefovich): remove this sleep once #87634 is fixed.
time.Sleep(4 * time.Second)
}

return nil
}

// WaitForClusterUpgrade waits for the cluster version to reach the
// first node's binary version. This function should only be called if
// every node in the cluster has been restarted to run the same binary
// version. We rely on the cluster's internal self-upgrading
// mechanism to update the underlying cluster version.
func WaitForClusterUpgrade(
ctx context.Context, l *logger.Logger, nodes option.NodeListOption, dbFunc func(int) *gosql.DB,
) error {
newVersion, err := BinaryVersion(dbFunc(nodes[0]))
if err != nil {
return err
}

l.Printf("waiting for cluster to auto-upgrade to %s", newVersion)
for _, node := range nodes {
err := retry.ForDuration(5*time.Minute, func() error {
currentVersion, err := ClusterVersion(ctx, dbFunc(node))
if err != nil {
return err
}
if currentVersion != newVersion {
return fmt.Errorf("%d: expected cluster version %s, got %s", node, newVersion, currentVersion)
}
l.Printf("%s: acked by n%d", currentVersion, node)
return nil
})
if err != nil {
return err
}
}

l.Printf("all nodes (%v) are upgraded to %s", nodes, newVersion)
return nil
}

// CheckpointName returns the expected name of the checkpoint file
// under `pkg/cmd/roachtest/fixtures/{nodeID}` for the given binary
// version.
func CheckpointName(binaryVersion string) string {
return "checkpoint-v" + binaryVersion
}

// VersionMsg returns a version string to be displayed in logs. It's
// either the version given, or the "<current>" string to represent
// the latest cockroach version, typically built off the branch being
// tested.
func VersionMsg(v string) string {
if v == MainVersion {
return "<current>"
}

return v
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tests
package clusterupgrade

import (
// Import embed for the version map
Expand Down
41 changes: 41 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "mixedversion",
srcs = [
"mixedversion.go",
"planner.go",
"runner.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/mixedversion",
visibility = ["//visibility:public"],
deps = [
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/option",
"//pkg/cmd/roachtest/roachtestutil/clusterupgrade",
"//pkg/cmd/roachtest/test",
"//pkg/roachpb",
"//pkg/roachprod/logger",
"//pkg/util/randutil",
"//pkg/util/timeutil",
"//pkg/util/version",
"@org_golang_x_sync//errgroup",
],
)

go_test(
name = "mixedversion_test",
srcs = ["planner_test.go"],
args = ["-test.timeout=295s"],
embed = [":mixedversion"],
deps = [
"//pkg/cmd/roachtest/option",
"//pkg/cmd/roachtest/roachtestutil/clusterupgrade",
"//pkg/roachprod/logger",
"//pkg/util/version",
"@com_github_stretchr_testify//require",
],
)

get_x_data(name = "get_x_data")
Loading