Skip to content

Commit

Permalink
ibu cnf: add 'rollback after a failed upgrade' test
Browse files Browse the repository at this point in the history
  • Loading branch information
mpmaruthu committed Jun 24, 2024
1 parent d0ade6d commit f9141f6
Show file tree
Hide file tree
Showing 2 changed files with 270 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ const (
LabelPrepAbortFlow = "ibu-prep-abort"
// LabelUpgradeAbortFlow represents upgrade-abort label that can be used for test cases selection.
LabelUpgradeAbortFlow = "ibu-upgrade-abort"
// LabelRollbackFlow represents rollback label that can be used for test cases selection.
LabelRollbackFlow = "ibu-auto-rollback"

// IbuCguNamespace is the namespace where IBU CGUs created on target hub.
IbuCguNamespace = "default"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
package upgrade_test

import (
"strings"
"time"

"k8s.io/utils/ptr"

"github.com/golang/glog"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openshift-kni/eco-goinfra/pkg/cgu"
"github.com/openshift-kni/eco-goinfra/pkg/lca"
"github.com/openshift-kni/eco-goinfra/pkg/nodes"
"github.com/openshift-kni/eco-goinfra/pkg/reportxml"
"github.com/openshift-kni/eco-gotests/tests/internal/cluster"
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/internal/cnfclusterinfo"
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/internal/cnfhelper"
. "github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/internal/cnfinittools"
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/upgrade-talm/internal/tsparams"
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/internal/nodestate"
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/internal/safeapirequest"
)

var (
ibu *lca.ImageBasedUpgradeBuilder
seedImageVersion string
err error
)

var _ = Describe(
"Validating rollback stage after a failed upgrade",
Label(tsparams.LabelRollbackFlow), func() {

BeforeEach(func() {
By("Saving target sno cluster info before the test", func() {
err := cnfclusterinfo.PreUpgradeClusterInfo.SaveClusterInfo()
Expect(err).NotTo(HaveOccurred(), "Failed to collect and save target sno cluster info before the test")
})

By("Fetching target sno cluster name", func() {
err = cnfclusterinfo.PreUpgradeClusterInfo.SaveClusterInfo()
Expect(err).NotTo(HaveOccurred(), "Failed to extract target sno cluster name")

tsparams.TargetSnoClusterName = cnfclusterinfo.PreUpgradeClusterInfo.Name
})

By("Retrieve seed image version and updating LCA init-monitor watchdog timer ", func() {
ibu, err = lca.PullImageBasedUpgrade(TargetSNOAPIClient)
Expect(err).NotTo(HaveOccurred(), "error pulling ibu resource from cluster")

seedImageVersion = ibu.Definition.Spec.SeedImageRef.Version

By("Setting LCA init-monitor watchdog timer to 5 minutes")
ibu.Definition.Spec.AutoRollbackOnFailure.InitMonitorTimeoutSeconds = 300
ibu, err = ibu.Update()
Expect(err).NotTo(HaveOccurred(), "error updating ibu resource with custom lca init-monitor timeout value")
})
})

AfterEach(func() {
// Deleting CGUs created for validating the test case.
By("Deleting pre-prep cgu created on target hub cluster", func() {
err = cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.PrePrepCguName,
tsparams.IbuCguNamespace)
Expect(err).NotTo(HaveOccurred(), "Failed to delete pre-prep cgu on target hub cluster")
})

By("Deleting prep cgu created on target hub cluster", func() {
err = cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.PrepCguName,
tsparams.IbuCguNamespace)
Expect(err).NotTo(HaveOccurred(), "Failed to delete prep cgu on target hub cluster")
})

By("Deleting upgrade cgu created on target hub cluster", func() {
err = cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.UpgradeCguName,
tsparams.IbuCguNamespace)
Expect(err).NotTo(HaveOccurred(), "Failed to delete upgrade cgu on target hub cluster")
})

By("Creating, enabling ibu finalize CGU and waiting for CGU status to report completed", func() {
finalizeCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient,
tsparams.FinalizeCguName, tsparams.IbuCguNamespace, 1).
WithCluster(tsparams.TargetSnoClusterName).
WithManagedPolicy(tsparams.FinalizePolicyName).
WithCanary(tsparams.TargetSnoClusterName)
finalizeCguBuilder.Definition.Spec.Enable = ptr.To(true)

finalizeCguBuilder, err := finalizeCguBuilder.Create()
Expect(err).ToNot(HaveOccurred(), "Failed to create finalize CGU.")

_, err = finalizeCguBuilder.WaitUntilComplete(5 * time.Minute)
Expect(err).ToNot(HaveOccurred(), "Finalize CGU did not complete in time.")
})

By("Deleting finalize cgu created on target hub cluster", func() {
err := cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.FinalizeCguName,
tsparams.IbuCguNamespace)
Expect(err).ToNot(HaveOccurred(), "Failed to delete finalize cgu on target hub cluster")
})
})

It("Rollback after a failed upgrade", reportxml.ID("69054"), func() {

By("Creating, enabling ibu pre-prep CGU and waiting for CGU status to report completed", func() {
prePrepCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient,
tsparams.PrePrepCguName, tsparams.IbuCguNamespace, 1).
WithCluster(tsparams.TargetSnoClusterName).
WithManagedPolicy(tsparams.PrePrepPolicyName).
WithCanary(tsparams.TargetSnoClusterName)
prePrepCguBuilder.Definition.Spec.Enable = ptr.To(true)

prePrepCguBuilder, err = prePrepCguBuilder.Create()
Expect(err).NotTo(HaveOccurred(), "Failed to create pre-prep CGU.")

_, err = prePrepCguBuilder.WaitUntilComplete(10 * time.Minute)
Expect(err).NotTo(HaveOccurred(), "Pre-prep CGU did not complete in time.")
})

By("Creating, enabling ibu prep CGU and waiting for CGU status to report completed", func() {
prepCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient,
tsparams.PrepCguName, tsparams.IbuCguNamespace, 1).
WithCluster(tsparams.TargetSnoClusterName).
WithManagedPolicy(tsparams.PrepPolicyName).
WithCanary(tsparams.TargetSnoClusterName)
prepCguBuilder.Definition.Spec.Enable = ptr.To(true)

prepCguBuilder, err = prepCguBuilder.Create()
Expect(err).NotTo(HaveOccurred(), "Failed to create prep CGU.")

_, err = prepCguBuilder.WaitUntilComplete(25 * time.Minute)
Expect(err).NotTo(HaveOccurred(), "Prep CGU did not complete in time.")
})

By("Creating, enabling ibu upgrade CGU, and waiting for node rebooted into stateroot B", func() {

By("Creating and enabling ibu upgrade CGU")

upgradeCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient,
tsparams.UpgradeCguName, tsparams.IbuCguNamespace, 1).
WithCluster(tsparams.TargetSnoClusterName).
WithManagedPolicy(tsparams.UpgradePolicyName).
WithCanary(tsparams.TargetSnoClusterName)
upgradeCguBuilder.Definition.Spec.Enable = ptr.To(true)

_, err = upgradeCguBuilder.Create()
Expect(err).NotTo(HaveOccurred(), "Failed to create upgrade CGU.")

By("Get list of node to be upgraded")

ibuNode, err := nodes.List(TargetSNOAPIClient)
Expect(err).NotTo(HaveOccurred(), "error listing node")

By("Wait for node to become unreachable")

for _, node := range ibuNode {
unreachable, err := nodestate.WaitForNodeToBeUnreachable(node.Object.Name, "6443", time.Minute*10)

Expect(err).To(BeNil(), "error waiting for %s node to shutdown", node.Object.Name)
Expect(unreachable).To(BeTrue(), "error: node %s is still reachable", node.Object.Name)
}

By("Wait for node to become reachable")

for _, node := range ibuNode {
reachable, err := nodestate.WaitForNodeToBeReachable(node.Object.Name, "6443", time.Minute*30)

Expect(err).To(BeNil(), "error waiting for %s node to become reachable", node.Object.Name)
Expect(reachable).To(BeTrue(), "error: node %s is still unreachable", node.Object.Name)
}

By("Wait until node is reporting as Ready")

err = safeapirequest.Do(func() error {
_, err := nodes.WaitForAllNodesAreReady(TargetSNOAPIClient, time.Minute*15)

return err
})
Expect(err).To(BeNil(), "error waiting for node to become ready")

By("Wait for IBU resource to be available")

err = nodestate.WaitForIBUToBeAvailable(TargetSNOAPIClient, ibu, time.Minute*15)
Expect(err).NotTo(HaveOccurred(), "error waiting for ibu resource to become available")
})

By("Verifying booted stateroot name on target sno cluster node", func() {
var seedVersionFound bool

getDeploymentIndexCmd := "rpm-ostree status --json | jq '.deployments[0].osname'"
getDesiredStaterootName, err := cluster.ExecCmdWithStdout(TargetSNOAPIClient, getDeploymentIndexCmd)
Expect(err).NotTo(HaveOccurred(), "could not execute command: %s", err)

for _, stdout := range getDesiredStaterootName {
bootedStaterootNameRes := strings.ReplaceAll(stdout, "_", "-")
if bootedStaterootNameRes != "" {
if strings.Contains(bootedStaterootNameRes, seedImageVersion) {
glog.V(100).Infof("Found "+seedImageVersion+" in %s", bootedStaterootNameRes)

seedVersionFound = true
}
}
}

Expect(seedVersionFound).To(BeTrue(), "Target cluster node booted into stateroot B")
})

By("Simulate a fault to make upgrade fail", func() {
faultInjectCmd := "echo a > /etc/mco/proxy.env"
faultInjectCmdRes, err := cluster.ExecCmdWithStdout(TargetSNOAPIClient, faultInjectCmd)
Expect(err).NotTo(HaveOccurred(), "could not execute command: %s", faultInjectCmdRes)
})

By("Verifying auto rollback triggered upon upgrade failure", func() {

By("Waiting for node rebooted into stateroot A and cluster become available", func() {

By("Get list of node to be upgraded")

ibuNode, err := nodes.List(TargetSNOAPIClient)
Expect(err).NotTo(HaveOccurred(), "error listing node")

By("Wait for node to become unreachable")

for _, node := range ibuNode {
unreachable, err := nodestate.WaitForNodeToBeUnreachable(node.Object.Name, "6443", time.Minute*10)

Expect(err).To(BeNil(), "error waiting for %s node to shutdown", node.Object.Name)
Expect(unreachable).To(BeTrue(), "error: node %s is still reachable", node.Object.Name)
}

By("Wait for node to become reachable")

for _, node := range ibuNode {
reachable, err := nodestate.WaitForNodeToBeReachable(node.Object.Name, "6443", time.Minute*30)

Expect(err).To(BeNil(), "error waiting for %s node to become reachable", node.Object.Name)
Expect(reachable).To(BeTrue(), "error: node %s is still unreachable", node.Object.Name)
}

By("Wait until node is reporting as Ready")

err = safeapirequest.Do(func() error {
_, err := nodes.WaitForAllNodesAreReady(TargetSNOAPIClient, time.Minute*15)

return err
})
Expect(err).To(BeNil(), "error waiting for node to become ready")

By("Wait for IBU resource to be available")

err = nodestate.WaitForIBUToBeAvailable(TargetSNOAPIClient, ibu, time.Minute*15)
Expect(err).NotTo(HaveOccurred(), "error waiting for ibu resource to become available")
})
})

By("Saving target sno cluster info after the test", func() {
err := cnfclusterinfo.PostUpgradeClusterInfo.SaveClusterInfo()
Expect(err).NotTo(HaveOccurred(), "Failed to collect and save target sno cluster info after the test")
})

By("Validating target sno cluster version after auto rollback", func() {
Expect(cnfclusterinfo.PreUpgradeClusterInfo.Version).
To(Equal(cnfclusterinfo.PostUpgradeClusterInfo.Version),
"Target sno cluster reports old cluster version")
})
})
})

0 comments on commit f9141f6

Please sign in to comment.