-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ibu cnf: add 'rollback after a failed upgrade' test
- Loading branch information
Showing
2 changed files
with
247 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
245 changes: 245 additions & 0 deletions
245
tests/lca/imagebasedupgrade/cnf/upgrade-talm/tests/rollback-after-failed-upgrade-test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,245 @@ | ||
package upgrade_test | ||
|
||
import ( | ||
"strings" | ||
"time" | ||
|
||
"k8s.io/utils/ptr" | ||
|
||
. "github.com/onsi/ginkgo/v2" | ||
. "github.com/onsi/gomega" | ||
"github.com/openshift-kni/eco-goinfra/pkg/cgu" | ||
"github.com/openshift-kni/eco-goinfra/pkg/lca" | ||
"github.com/openshift-kni/eco-goinfra/pkg/nodes" | ||
"github.com/openshift-kni/eco-goinfra/pkg/reportxml" | ||
"github.com/openshift-kni/eco-gotests/tests/internal/cluster" | ||
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/internal/cnfclusterinfo" | ||
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/internal/cnfhelper" | ||
. "github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/internal/cnfinittools" | ||
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/cnf/upgrade-talm/internal/tsparams" | ||
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/internal/nodestate" | ||
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/internal/safeapirequest" | ||
"github.com/openshift-kni/eco-gotests/tests/lca/imagebasedupgrade/internal/seedimage" | ||
) | ||
|
||
var ( | ||
ibu *lca.ImageBasedUpgradeBuilder | ||
seedImageVersion *seedimage.SeedImageContent | ||
err error | ||
lcaInitMonitorTimeout time.Duration | ||
) | ||
|
||
var _ = Describe( | ||
"Validating rollback stage after a failed upgrade", | ||
Label(tsparams.LabelRollbackFlow), func() { | ||
|
||
BeforeEach(func() { | ||
By("Fetching target sno cluster name", func() { | ||
err = cnfclusterinfo.PreUpgradeClusterInfo.SaveClusterInfo() | ||
Expect(err).NotTo(HaveOccurred(), "Failed to extract target sno cluster name") | ||
|
||
tsparams.TargetSnoClusterName = cnfclusterinfo.PreUpgradeClusterInfo.Name | ||
}) | ||
|
||
By("Retrieve seed image version", func() { | ||
ibu, err = lca.PullImageBasedUpgrade(TargetSNOAPIClient) | ||
Expect(err).NotTo(HaveOccurred(), "error pulling ibu resource from cluster") | ||
|
||
seedImageVersion, err = seedimage.GetContent(TargetSNOAPIClient, ibu.Definition.Spec.SeedImageRef.Version) | ||
Expect(err).NotTo(HaveOccurred(), "error getting seed image version info") | ||
}) | ||
|
||
By("Setting LCA init-monitor watchdog timer to 5 minutes to trigger rollback upon upgrade failure", func() { | ||
ibu, err = lca.PullImageBasedUpgrade(TargetSNOAPIClient) | ||
Expect(err).NotTo(HaveOccurred(), "error pulling ibu resource from cluster") | ||
|
||
ibu.AutoRollbackOnFailureInitMonitorTimeoutSeconds(300) | ||
}) | ||
}) | ||
|
||
AfterEach(func() { | ||
// Deleting CGUs created for validating the test case. | ||
By("Deleting pre-prep cgu created on target hub cluster", func() { | ||
err = cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.PrePrepCguName, | ||
tsparams.IbuCguNamespace) | ||
Expect(err).NotTo(HaveOccurred(), "Failed to delete pre-prep cgu on target hub cluster") | ||
}) | ||
|
||
By("Deleting prep cgu created on target hub cluster", func() { | ||
err = cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.PrepCguName, | ||
tsparams.IbuCguNamespace) | ||
Expect(err).NotTo(HaveOccurred(), "Failed to delete prep cgu on target hub cluster") | ||
}) | ||
|
||
By("Deleting upgrade cgu created on target hub cluster", func() { | ||
err = cnfhelper.DeleteIbuTestCguOnTargetHub(TargetHubAPIClient, tsparams.UpgradeCguName, | ||
tsparams.IbuCguNamespace) | ||
Expect(err).NotTo(HaveOccurred(), "Failed to delete upgrade cgu on target hub cluster") | ||
}) | ||
}) | ||
|
||
It("Rollback after a failed upgrade", reportxml.ID("69054"), func() { | ||
By("Creating, enabling ibu pre-prep CGU and waiting for CGU status to report completed", func() { | ||
prePrepCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient, | ||
tsparams.PrePrepCguName, tsparams.IbuCguNamespace, 1). | ||
WithCluster(tsparams.TargetSnoClusterName). | ||
WithManagedPolicy(tsparams.PrePrepPolicyName). | ||
WithCanary(tsparams.TargetSnoClusterName) | ||
prePrepCguBuilder.Definition.Spec.Enable = ptr.To(true) | ||
|
||
prePrepCguBuilder, err = prePrepCguBuilder.Create() | ||
Expect(err).NotTo(HaveOccurred(), "Failed to create pre-prep CGU.") | ||
|
||
_, err = prePrepCguBuilder.WaitUntilComplete(10 * time.Minute) | ||
Expect(err).NotTo(HaveOccurred(), "Pre-prep CGU did not complete in time.") | ||
}) | ||
|
||
By("Creating, enabling ibu prep CGU and waiting for CGU status to report completed", func() { | ||
prepCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient, | ||
tsparams.PrepCguName, tsparams.IbuCguNamespace, 1). | ||
WithCluster(tsparams.TargetSnoClusterName). | ||
WithManagedPolicy(tsparams.PrepPolicyName). | ||
WithCanary(tsparams.TargetSnoClusterName) | ||
prepCguBuilder.Definition.Spec.Enable = ptr.To(true) | ||
|
||
prepCguBuilder, err = prepCguBuilder.Create() | ||
Expect(err).NotTo(HaveOccurred(), "Failed to create prep CGU.") | ||
|
||
_, err = prepCguBuilder.WaitUntilComplete(25 * time.Minute) | ||
Expect(err).NotTo(HaveOccurred(), "Prep CGU did not complete in time.") | ||
}) | ||
|
||
By("Creating, and enabling ibu upgrade CGU", func() { | ||
upgradeCguBuilder := cgu.NewCguBuilder(TargetHubAPIClient, | ||
tsparams.UpgradeCguName, tsparams.IbuCguNamespace, 1). | ||
WithCluster(tsparams.TargetSnoClusterName). | ||
WithManagedPolicy(tsparams.UpgradePolicyName). | ||
WithCanary(tsparams.TargetSnoClusterName) | ||
upgradeCguBuilder.Definition.Spec.Enable = ptr.To(true) | ||
|
||
_, err = upgradeCguBuilder.Create() | ||
Expect(err).NotTo(HaveOccurred(), "Failed to create upgrade CGU.") | ||
}) | ||
|
||
By("Verifying auto rollback triggered upon upgrade failure", func() { | ||
|
||
By("Waiting for node rebooted into stateroot B and cluster become available", func() { | ||
|
||
By("Get list of node to be upgraded") | ||
|
||
ibuNode, err := nodes.List(TargetSNOAPIClient) | ||
Expect(err).NotTo(HaveOccurred(), "error listing node") | ||
|
||
By("Wait for node to become unreachable") | ||
|
||
for _, node := range ibuNode { | ||
unreachable, err := nodestate.WaitForNodeToBeUnreachable(node.Object.Name, "6443", time.Minute*15) | ||
|
||
Expect(err).To(BeNil(), "error waiting for %s node to shutdown", node.Object.Name) | ||
Expect(unreachable).To(BeTrue(), "error: node %s is still reachable", node.Object.Name) | ||
} | ||
|
||
By("Wait for node to become reachable") | ||
|
||
for _, node := range ibuNode { | ||
reachable, err := nodestate.WaitForNodeToBeReachable(node.Object.Name, "6443", time.Minute*20) | ||
|
||
Expect(err).To(BeNil(), "error waiting for %s node to become reachable", node.Object.Name) | ||
Expect(reachable).To(BeTrue(), "error: node %s is still unreachable", node.Object.Name) | ||
} | ||
|
||
By("Wait until node is reporting as Ready") | ||
|
||
err = safeapirequest.Do(func() error { | ||
_, err := nodes.WaitForAllNodesAreReady(TargetSNOAPIClient, time.Minute*10) | ||
|
||
return err | ||
}) | ||
Expect(err).To(BeNil(), "error waiting for node to become ready") | ||
|
||
By("Wait for IBU resource to be available") | ||
|
||
err = nodestate.WaitForIBUToBeAvailable(TargetSNOAPIClient, ibu, time.Minute*10) | ||
Expect(err).NotTo(HaveOccurred(), "error waiting for ibu resource to become available") | ||
}) | ||
|
||
By("Verifying current booted stateroot name on target sno cluster node", func() { | ||
getDeploymentIndexCmd := "rpm-ostree status --json | jq '.deployments[0].osname'" | ||
getDesiredStaterootName, err := cluster.ExecCmdWithStdout(TargetSNOAPIClient, getDeploymentIndexCmd) | ||
Expect(err).NotTo(HaveOccurred(), "could not execute command: %s", err) | ||
|
||
for _, stdout := range getDesiredStaterootName { | ||
for _, trimStaterootName := range strings.Split(stdout, "rhcos_") { | ||
bootedStaterootNameRes := strings.ReplaceAll(trimStaterootName, "_", "-") | ||
Expect(bootedStaterootNameRes).To(Equal(seedImageVersion), | ||
"Target cluster node booted into stateroot B") | ||
} | ||
} | ||
}) | ||
|
||
By("Simulate a fault to make upgrade fail, waiting LCA init-monitor timeout, and check upgrade cgu status", func() { | ||
faultInjectCmd := "echo a > /etc/mco/proxy.env" | ||
faultInjectCmdRes, err := cluster.ExecCmdWithStdout(TargetSNOAPIClient, faultInjectCmd) | ||
Expect(err).NotTo(HaveOccurred(), "could not execute command: %s", faultInjectCmdRes) | ||
|
||
By("Waiting for LCA init-monitor timeout to trigger auto rollback") | ||
lcaInitMonitorTimeout = 5 * time.Minute | ||
|
||
By("Verifying upgrade cgu status on target hub cluster") | ||
upgradeCguStatusCheck, err := cgu.Pull(TargetHubAPIClient, | ||
tsparams.UpgradeCguName, | ||
tsparams.IbuCguNamespace) | ||
Expect(err).NotTo(HaveOccurred(), "Failed to pull upgrade cgu status") | ||
|
||
_, err = upgradeCguStatusCheck.WaitUntilComplete(lcaInitMonitorTimeout) | ||
Expect(err).To(HaveOccurred(), "Upgrade CGU expected to report 'InProgress' state.") | ||
}) | ||
|
||
By("Waiting for node rebooted into stateroot A and cluster become available", func() { | ||
|
||
By("Get list of node to be upgraded") | ||
|
||
ibuNode, err := nodes.List(TargetSNOAPIClient) | ||
Expect(err).NotTo(HaveOccurred(), "error listing node") | ||
|
||
By("Wait for node to become unreachable") | ||
|
||
for _, node := range ibuNode { | ||
unreachable, err := nodestate.WaitForNodeToBeUnreachable(node.Object.Name, "6443", time.Minute*15) | ||
|
||
Expect(err).To(BeNil(), "error waiting for %s node to shutdown", node.Object.Name) | ||
Expect(unreachable).To(BeTrue(), "error: node %s is still reachable", node.Object.Name) | ||
} | ||
|
||
By("Wait for node to become reachable") | ||
|
||
for _, node := range ibuNode { | ||
reachable, err := nodestate.WaitForNodeToBeReachable(node.Object.Name, "6443", time.Minute*20) | ||
|
||
Expect(err).To(BeNil(), "error waiting for %s node to become reachable", node.Object.Name) | ||
Expect(reachable).To(BeTrue(), "error: node %s is still unreachable", node.Object.Name) | ||
} | ||
|
||
By("Wait until node is reporting as Ready") | ||
|
||
err = safeapirequest.Do(func() error { | ||
_, err := nodes.WaitForAllNodesAreReady(TargetSNOAPIClient, time.Minute*10) | ||
|
||
return err | ||
}) | ||
Expect(err).To(BeNil(), "error waiting for node to become ready") | ||
|
||
By("Wait for IBU resource to be available") | ||
|
||
err = nodestate.WaitForIBUToBeAvailable(TargetSNOAPIClient, ibu, time.Minute*10) | ||
Expect(err).NotTo(HaveOccurred(), "error waiting for ibu resource to become available") | ||
}) | ||
}) | ||
|
||
By("Validating target sno cluster version after auto rollback", func() { | ||
Expect(cnfclusterinfo.PreUpgradeClusterInfo.Version). | ||
To(Equal(cnfclusterinfo.PostUpgradeClusterInfo.Version), | ||
"Target sno cluster reports old cluster version") | ||
}) | ||
}) | ||
}) |