Skip to content

Commit

Permalink
go/upgrade: ensure upgrade handler exists
Browse files Browse the repository at this point in the history
  • Loading branch information
ptrus committed Mar 8, 2021
1 parent c5aec61 commit ae9521a
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 61 deletions.
1 change: 1 addition & 0 deletions .changelog/3768.bugfix.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
go/upgrade: ensure upgrade handler exists
5 changes: 5 additions & 0 deletions .changelog/3768.bugfix.2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Allow switching binary while an upgrade is pending or in progress

Removes `RunningVersion`/`SubmittingVersion` internal pending upgrade fields.
Binary can now be switched mid-upgrade as long as it remains compatible with
the in-progress upgrade.
5 changes: 5 additions & 0 deletions .changelog/3768.bugfix.3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Progress the startup upgrade stage only after a successful startup step

Before, the startup state was progressed before the startup stage was run,
therefore in case of a failed startup upgrade the stage would be skipped after
the node restart.
17 changes: 0 additions & 17 deletions go/upgrade/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,6 @@ var (
// the consensus layer has reached the scheduled shutdown epoch and should be interrupted.
ErrStopForUpgrade = errors.New(ModuleName, 1, "upgrade: reached upgrade epoch")

// ErrUpgradePending is the error returned when there is a pending upgrade and the node detects that it is
// not the one performing it.
ErrUpgradePending = errors.New(ModuleName, 2, "upgrade: this binary is scheduled to be replaced")

// ErrNewTooSoon is the error returned when the node started isn't the pre-upgrade version and the upgrade
// epoch hasn't been reached yet.
ErrNewTooSoon = errors.New(ModuleName, 3, "upgrade: running different binary before reaching the upgrade epoch")

// ErrInvalidResumingVersion is the error returned when the running node's version is different from the one that
// started performing the upgrade.
ErrInvalidResumingVersion = errors.New(ModuleName, 4, "upgrade: node restarted mid-upgrade with different version")

// ErrAlreadyPending is the error returned from SubmitDescriptor when the specific upgrade is already pending.
ErrAlreadyPending = errors.New(ModuleName, 5, "upgrade: submitted upgrade is already pending, can not resubmit descriptor")

Expand Down Expand Up @@ -184,11 +172,6 @@ type PendingUpgrade struct {
// Descriptor is the upgrade descriptor describing the upgrade.
Descriptor *Descriptor `json:"descriptor"`

// SubmittingVersion is the version of the node used to submit the descriptor.
SubmittingVersion string `json:"submitting_version"`
// RunningVersion is the version of the node trying to execute the descriptor.
RunningVersion string `json:"running_version"`

// UpgradeHeight is the height at which the upgrade epoch was reached
// (or InvalidUpgradeHeight if it hasn't been reached yet).
UpgradeHeight int64 `json:"upgrade_height"`
Expand Down
4 changes: 4 additions & 0 deletions go/upgrade/migrations/dummy.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,7 @@ func (th *dummyMigrationHandler) ConsensusUpgrade(ctx *Context, privateCtx inter

return nil
}

func init() {
Register(DummyUpgradeName, &dummyMigrationHandler{})
}
30 changes: 18 additions & 12 deletions go/upgrade/migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
package migrations

import (
"fmt"
"sync"

"github.com/oasisprotocol/oasis-core/go/common/logging"
upgradeApi "github.com/oasisprotocol/oasis-core/go/upgrade/api"
)
Expand All @@ -11,9 +14,12 @@ const (
ModuleName = "upgrade-migrations"
)

var registeredHandlers = map[string]Handler{
DummyUpgradeName: &dummyMigrationHandler{},
}
var (
registeredHandlers sync.Map

// ErrMissingMigrationHandler is error returned when a migration handler is not registered.
ErrMissingMigrationHandler = fmt.Errorf("missing migration handler")
)

// Handler is the interface used by migration handlers.
type Handler interface {
Expand Down Expand Up @@ -41,7 +47,10 @@ type Context struct {

// Register registers a new migration handler, by upgrade name.
func Register(name string, handler Handler) {
registeredHandlers[name] = handler
if _, isRegistered := registeredHandlers.Load(name); isRegistered {
panic(fmt.Errorf("migration handler already registered: %s", name))
}
registeredHandlers.Store(name, handler)
}

// NewContext returns a new upgrade migration context.
Expand All @@ -54,14 +63,11 @@ func NewContext(upgrade *upgradeApi.PendingUpgrade, dataDir string) *Context {
}

// GetHandler returns the handler associated with the upgrade described in the context.
// If the handler does not exist, this is considered a severe programmer error and will result in a panic.
func GetHandler(ctx *Context) Handler {
handler, ok := registeredHandlers[ctx.Upgrade.Descriptor.Name]
if !ok {
// If we got here, that means the upgrade descriptor checked out, including the upgrader hash.
// Nothing left to do but bite the dust.
panic("unknown upgrade name, no way forward")
func GetHandler(name string) (Handler, error) {
h, exists := registeredHandlers.Load(name)
if !exists {
return nil, ErrMissingMigrationHandler
}

return handler
return h.(Handler), nil
}
52 changes: 20 additions & 32 deletions go/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
beacon "github.com/oasisprotocol/oasis-core/go/beacon/api"
"github.com/oasisprotocol/oasis-core/go/common/logging"
"github.com/oasisprotocol/oasis-core/go/common/persistent"
"github.com/oasisprotocol/oasis-core/go/common/version"
"github.com/oasisprotocol/oasis-core/go/upgrade/api"
"github.com/oasisprotocol/oasis-core/go/upgrade/migrations"
)
Expand All @@ -23,14 +22,8 @@ var (
_ api.Backend = (*upgradeManager)(nil)

metadataStoreKey = []byte("descriptors")

thisVersion = makeVersionString()
)

func makeVersionString() string {
return version.SoftwareVersion
}

type upgradeManager struct {
store *persistent.ServiceStore
pending []*api.PendingUpgrade
Expand All @@ -54,7 +47,6 @@ func (u *upgradeManager) SubmitDescriptor(ctx context.Context, descriptor *api.D
pending := &api.PendingUpgrade{
Descriptor: descriptor,
}
pending.SubmittingVersion = thisVersion
u.pending = append(u.pending, pending)

u.logger.Info("received upgrade descriptor, scheduling shutdown",
Expand Down Expand Up @@ -87,7 +79,7 @@ func (u *upgradeManager) CancelUpgrade(ctx context.Context, descriptor *api.Desc
pending = append(pending, pu)
continue
}
if pu.RunningVersion != "" || pu.UpgradeHeight != api.InvalidUpgradeHeight || pu.HasAnyStages() {
if pu.UpgradeHeight != api.InvalidUpgradeHeight || pu.HasAnyStages() {
return api.ErrUpgradeInProgress
}
}
Expand Down Expand Up @@ -118,16 +110,12 @@ func (u *upgradeManager) checkStatus() error {
continue
}

// By this point, the descriptor is valid and still pending.
// Check if upgrade should proceed.
if pu.UpgradeHeight == api.InvalidUpgradeHeight {
// Only allow the old binary to run before the upgrade epoch.
if pu.SubmittingVersion != thisVersion {
return api.ErrNewTooSoon
}
return nil
continue
}

// Otherwise, the upgrade should proceed right now. Check that we have the right binary.
// The upgrade should proceed right now. Check that we have the right binary.
if err = pu.Descriptor.EnsureCompatible(); err != nil {
u.logger.Error("incompatible binary version for upgrade",
"upgrade_name", pu.Descriptor.Name,
Expand All @@ -137,14 +125,14 @@ func (u *upgradeManager) checkStatus() error {
return err
}

// In case the previous startup was e.g. interrupted during the second part of the
// upgrade, we need to make sure that we're the same version as the previous run.
if pu.RunningVersion != "" && pu.RunningVersion != thisVersion {
return api.ErrInvalidResumingVersion
// Ensure the upgrade handler exists.
if _, err = migrations.GetHandler(pu.Descriptor.Name); err != nil {
u.logger.Error("error getting migration handler for upgrade",
"name", pu.Descriptor.Name,
"err", err,
)
return err
}

// Everything checks out, fill in the blanks.
pu.RunningVersion = thisVersion
}

if err = u.flushDescriptorLocked(); err != nil {
Expand Down Expand Up @@ -194,25 +182,24 @@ func (u *upgradeManager) StartupUpgrade() error {
if pu.HasStage(api.UpgradeStageStartup) {
u.logger.Warn("startup upgrade already performed, skipping",
"name", pu.Descriptor.Name,
"submitted_by", pu.SubmittingVersion,
"version", pu.RunningVersion,
)
continue
}

// Execute the statup stage.
pu.PushStage(api.UpgradeStageStartup)
u.logger.Warn("performing startup upgrade",
"name", pu.Descriptor.Name,
"submitted_by", pu.SubmittingVersion,
"version", pu.RunningVersion,
logging.LogEvent, api.LogEventStartupUpgrade,
)
migrationCtx := migrations.NewContext(pu, u.dataDir)
handler := migrations.GetHandler(migrationCtx)
handler, err := migrations.GetHandler(pu.Descriptor.Name)
if err != nil {
return err
}
if err := handler.StartupUpgrade(migrationCtx); err != nil {
return err
}
pu.PushStage(api.UpgradeStageStartup)
}

return u.flushDescriptorLocked()
Expand Down Expand Up @@ -249,13 +236,14 @@ func (u *upgradeManager) ConsensusUpgrade(privateCtx interface{}, currentEpoch b
if !pu.HasStage(api.UpgradeStageConsensus) {
u.logger.Warn("performing consensus upgrade",
"name", pu.Descriptor.Name,
"submitted_by", pu.SubmittingVersion,
"version", pu.RunningVersion,
logging.LogEvent, api.LogEventConsensusUpgrade,
)

migrationCtx := migrations.NewContext(pu, u.dataDir)
handler := migrations.GetHandler(migrationCtx)
handler, err := migrations.GetHandler(pu.Descriptor.Name)
if err != nil {
return err
}
if err := handler.ConsensusUpgrade(migrationCtx, privateCtx); err != nil {
return err
}
Expand Down

0 comments on commit ae9521a

Please sign in to comment.