Skip to content

Commit

Permalink
system-test: Enable ptp checks in existing tests and fix stab output
Browse files Browse the repository at this point in the history
Also, include small improvements like customize reboot
recovery time. Also, fixes the stability output file
between two different tests

Signed-off-by: Rodrigo Lopez <[email protected]>
  • Loading branch information
Rodrigo Lopez committed Jul 1, 2024
1 parent d61419b commit caf7e10
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 39 deletions.
17 changes: 10 additions & 7 deletions tests/system-tests/ran-du/internal/randuconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@ type RanDuConfig struct {
CreateShellCmd string `yaml:"create_shell_cmd" envconfig:"ECO_RANDU_TESTWORKLOAD_CREATE_SHELLCMD"`
DeleteShellCmd string `yaml:"delete_shell_cmd" envconfig:"ECO_RANDU_TESTWORKLOAD_DELETE_SHELLCMD"`
} `yaml:"randu_test_workload"`
LaunchWorkloadIterations int `yaml:"launch_workload_iterations" envconfig:"ECO_RANDU_LAUNCH_WORKLOAD_ITERATIONS"`
SoftRebootIterations int `yaml:"soft_reboot_iterations" envconfig:"ECO_RANDU_SOFT_REBOOT_ITERATIONS"`
HardRebootIterations int `yaml:"hard_reboot_iterations" envconfig:"ECO_RANDU_HARD_REBOOT_ITERATIONS"`
StabilityDurationMins int64 `yaml:"stability_duration_mins" envconfig:"ECO_RANDU_STABILITY_DUR_MINS"`
StabilityIntervalMins int64 `yaml:"stability_interval_mins" envconfig:"ECO_RANDU_STABILITY_INT_MINS"`
StabilityOutputPath string `yaml:"stability_output_path" envconfig:"ECO_RANDU_STABILITY_OUTPUT_PATH"`
PtpEnabled bool `yaml:"ptp_enabled" envconfig:"ECO_RANDU_PTP_ENABLED"`
LaunchWorkloadIterations int `yaml:"launch_workload_iterations" envconfig:"ECO_RANDU_LAUNCH_WORKLOAD_ITERATIONS"`
SoftRebootIterations int `yaml:"soft_reboot_iterations" envconfig:"ECO_RANDU_SOFT_REBOOT_ITERATIONS"`
HardRebootIterations int `yaml:"hard_reboot_iterations" envconfig:"ECO_RANDU_HARD_REBOOT_ITERATIONS"`
StabilityWorkloadDurMins int `yaml:"stability_workload_duration_mins" envconfig:"ECO_RANDU_STAB_W_DUR_MINS"`
StabilityWorkloadIntMins int `yaml:"stability_workload_interval_mins" envconfig:"ECO_RANDU_STAB_W_INT_MINS"`
StabilityNoWorkloadDurMins int `yaml:"stability_no_workload_duration_mins" envconfig:"ECO_RANDU_STAB_NW_DUR_MINS"`
StabilityNoWorkloadIntMins int `yaml:"stability_no_workload_interval_mins" envconfig:"ECO_RANDU_STAB_NW_INT_MINS"`
StabilityOutputPath string `yaml:"stability_output_path" envconfig:"ECO_RANDU_STABILITY_OUTPUT_PATH"`
PtpEnabled bool `yaml:"ptp_enabled" envconfig:"ECO_RANDU_PTP_ENABLED"`
RebootRecoveryTime int `yaml:"reboot_recovery_time" envconfig:"ECO_RANDU_RECOVERY_TIME"`
}

// NewRanDuConfig returns instance of RanDuConfig config type.
Expand Down
7 changes: 5 additions & 2 deletions tests/system-tests/ran-du/internal/randuconfig/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ randu_test_workload:
launch_workload_iterations: 5
soft_reboot_iterations: 5
hard_reboot_iterations: 5
reboot_recovery_time: 2

stability_duration_mins: 30
stability_interval_mins: 5
stability_no_workload_duration_mins: 30
stability_no_workload_interval_mins: 5
stability_workload_duration_mins: 30
stability_workload_interval_mins: 5
stability_output_path: "/tmp/reports"

ptp_enabled: true
22 changes: 17 additions & 5 deletions tests/system-tests/ran-du/tests/hard-reboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ import (
"github.com/openshift-kni/eco-goinfra/pkg/pod"
"github.com/openshift-kni/eco-goinfra/pkg/reportxml"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/await"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/ptp"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/reboot"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/shell"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/sriov"
. "github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuinittools"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuparams"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randutestworkload"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -29,8 +29,8 @@ var _ = Describe(
BeforeAll(func() {
By("Preparing workload")
if namespace.NewBuilder(APIClient, RanDuTestConfig.TestWorkload.Namespace).Exists() {
err := randutestworkload.CleanNameSpace(randuparams.DefaultTimeout, RanDuTestConfig.TestWorkload.Namespace)
Expect(err).ToNot(HaveOccurred(), "Failed to clean workload test namespace objects")
_, err := shell.ExecuteCmd(RanDuTestConfig.TestWorkload.DeleteShellCmd)
Expect(err).ToNot(HaveOccurred(), "Failed to delete workload")
}

if RanDuTestConfig.TestWorkload.CreateMethod == randuparams.TestWorkloadShellLaunchMethod {
Expand Down Expand Up @@ -67,8 +67,9 @@ var _ = Describe(
err = reboot.HardRebootNode(node.Definition.Name, randuparams.TestNamespaceName)
Expect(err).ToNot(HaveOccurred(), "Error rebooting the nodes.")

By("Wait for two more minutes for the cluster resources to reconciliate their state")
time.Sleep(2 * time.Minute)
By(fmt.Sprintf("Wait for %d minutes for the cluster resources to reconciliate their state",
RanDuTestConfig.RebootRecoveryTime))
time.Sleep(time.Duration(RanDuTestConfig.RebootRecoveryTime) * time.Minute)

By("Remove any pods in UnexpectedAdmissionError state")
listOptions := metav1.ListOptions{
Expand Down Expand Up @@ -134,6 +135,17 @@ var _ = Describe(
"error: vfio devices inside pod( %s ) do not match pod %s attachments:", cmd.String(), pod.Definition.Name)
}
}

if RanDuTestConfig.PtpEnabled {
timeInterval := 3 * time.Minute
time.Sleep(timeInterval)

By("Check PTP status for the last 3 minutes")
ptpOnSync, err := ptp.ValidatePTPStatus(APIClient, timeInterval)
Expect(err).ToNot(HaveOccurred(), "PTP Error: %s", err)
Expect(ptpOnSync).To(Equal(true))
}

}
}
})
Expand Down
6 changes: 4 additions & 2 deletions tests/system-tests/ran-du/tests/kernel-crash-kdump.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ran_du_system_test

import (
"fmt"
"strings"
"time"

Expand Down Expand Up @@ -34,8 +35,9 @@ var _ = Describe(
err = reboot.KernelCrashKdump(node.Definition.Name)
Expect(err).ToNot(HaveOccurred(), "Error triggering a kernel crash on the node.")

By("Wait for two more minutes for the cluster resources to reconciliate their state")
time.Sleep(2 * time.Minute)
By(fmt.Sprintf("Wait for %d minutes for the cluster resources to reconciliate their state",
RanDuTestConfig.RebootRecoveryTime))
time.Sleep(time.Duration(RanDuTestConfig.RebootRecoveryTime) * time.Minute)

By("Assert vmcore dump was generated")
cmdToExec := []string{"chroot", "/rootfs", "ls", "/var/crash"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ import (
"github.com/openshift-kni/eco-goinfra/pkg/reportxml"
"github.com/openshift-kni/eco-gotests/tests/internal/cluster"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/await"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/ptp"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/shell"
. "github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuinittools"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuparams"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randutestworkload"
)

var _ = Describe(
Expand All @@ -29,10 +29,10 @@ var _ = Describe(
for iter := 0; iter < RanDuTestConfig.LaunchWorkloadIterations; iter++ {
fmt.Printf("Launch workload iteration no. %d\n", iter)

By("Clean up workload namespace")
if namespace.NewBuilder(APIClient, RanDuTestConfig.TestWorkload.Namespace).Exists() {
err := randutestworkload.CleanNameSpace(randuparams.DefaultTimeout, RanDuTestConfig.TestWorkload.Namespace)
Expect(err).ToNot(HaveOccurred(), "Failed to clean workload test namespace objects")
By("Deleting workload using shell method")
_, err := shell.ExecuteCmd(RanDuTestConfig.TestWorkload.DeleteShellCmd)
Expect(err).ToNot(HaveOccurred(), "Failed to delete workload")
}

if RanDuTestConfig.TestWorkload.CreateMethod == randuparams.TestWorkloadShellLaunchMethod {
Expand All @@ -54,6 +54,17 @@ var _ = Describe(
By("Waiting for all pods to become ready")
_, err = await.WaitUntilAllPodsReady(APIClient, RanDuTestConfig.TestWorkload.Namespace, randuparams.DefaultTimeout)
Expect(err).ToNot(HaveOccurred(), "pod not ready: %s", err)

if RanDuTestConfig.PtpEnabled {
timeInterval := 3 * time.Minute
time.Sleep(timeInterval)

By("Check PTP status for the last 3 minutes after workload deployment")
ptpOnSync, err := ptp.ValidatePTPStatus(APIClient, timeInterval)
Expect(err).ToNot(HaveOccurred(), "PTP Error: %s", err)
Expect(ptpOnSync).To(Equal(true))
}

}

By("Observe node load average while workload is running")
Expand Down
13 changes: 13 additions & 0 deletions tests/system-tests/ran-du/tests/launch-workload.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package ran_du_system_test

import (
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openshift-kni/eco-goinfra/pkg/namespace"
"github.com/openshift-kni/eco-goinfra/pkg/reportxml"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/await"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/ptp"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/shell"
. "github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuinittools"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuparams"
Expand Down Expand Up @@ -46,6 +49,16 @@ var _ = Describe(
_, err := await.WaitUntilAllPodsReady(APIClient, RanDuTestConfig.TestWorkload.Namespace, randuparams.DefaultTimeout)
Expect(err).ToNot(HaveOccurred(), "pod not ready: %s", err)

if RanDuTestConfig.PtpEnabled {
timeInterval := 3 * time.Minute
time.Sleep(timeInterval)

By("Check PTP status for the last 3 minutes after workload deployment")
ptpOnSync, err := ptp.ValidatePTPStatus(APIClient, timeInterval)
Expect(err).ToNot(HaveOccurred(), "PTP Error: %s", err)
Expect(ptpOnSync).To(Equal(true))
}

})
AfterAll(func() {
By("Cleaning up test workload resources")
Expand Down
21 changes: 16 additions & 5 deletions tests/system-tests/ran-du/tests/soft-reboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ import (
"github.com/openshift-kni/eco-goinfra/pkg/pod"
"github.com/openshift-kni/eco-goinfra/pkg/reportxml"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/await"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/ptp"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/reboot"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/shell"
"github.com/openshift-kni/eco-gotests/tests/system-tests/internal/sriov"
. "github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuinittools"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randuparams"
"github.com/openshift-kni/eco-gotests/tests/system-tests/ran-du/internal/randutestworkload"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -31,8 +31,8 @@ var _ = Describe(
By("Preparing workload")

if namespace.NewBuilder(APIClient, RanDuTestConfig.TestWorkload.Namespace).Exists() {
err := randutestworkload.CleanNameSpace(randuparams.DefaultTimeout, RanDuTestConfig.TestWorkload.Namespace)
Expect(err).ToNot(HaveOccurred(), "Failed to clean workload test namespace objects")
_, err := shell.ExecuteCmd(RanDuTestConfig.TestWorkload.DeleteShellCmd)
Expect(err).ToNot(HaveOccurred(), "Failed to delete workload")
}

if RanDuTestConfig.TestWorkload.CreateMethod == "shell" {
Expand Down Expand Up @@ -82,8 +82,9 @@ var _ = Describe(
err = deploy.WaitUntilCondition("Available", 8*time.Minute)
Expect(err).ToNot(HaveOccurred(), "openshift apiserver deployment has not recovered in time after reboot")

By("Wait for two more minutes for the cluster resources to reconciliate their state")
time.Sleep(2 * time.Minute)
By(fmt.Sprintf("Wait for %d minutes for the cluster resources to reconciliate their state",
RanDuTestConfig.RebootRecoveryTime))
time.Sleep(time.Duration(RanDuTestConfig.RebootRecoveryTime) * time.Minute)

By("Remove any pods in UnexpectedAdmissionError state")
listOptions := metav1.ListOptions{
Expand Down Expand Up @@ -149,6 +150,16 @@ var _ = Describe(
"error: vfio devices inside pod( %s ) do not match pod %s attachments:", cmd.String(), pod.Definition.Name)
}
}

if RanDuTestConfig.PtpEnabled {
timeInterval := 3 * time.Minute
time.Sleep(timeInterval)

By("Check PTP status for the last 3 minutes")
ptpOnSync, err := ptp.ValidatePTPStatus(APIClient, timeInterval)
Expect(err).ToNot(HaveOccurred(), "PTP Error: %s", err)
Expect(ptpOnSync).To(Equal(true))
}
}
}
})
Expand Down
16 changes: 9 additions & 7 deletions tests/system-tests/ran-du/tests/stability-no-workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ var _ = Describe(
It("StabilityNoWorkload", reportxml.ID("74522"), Label("StabilityNoWorkload"), func() {

outputDir := RanDuTestConfig.StabilityOutputPath
policiesOutputFile := fmt.Sprintf("%s/stability_policies.log", outputDir)
ptpOutputFile := fmt.Sprintf("%s/stability_ptp.log", outputDir)
policiesOutputFile := fmt.Sprintf("%s/stability_no_workload_policies.log", outputDir)
ptpOutputFile := fmt.Sprintf("%s/stability_no_workload_ptp.log", outputDir)
namespaces := []string{"openshift-etcd", "openshift-apiserver"}

totalDuration := time.Duration(RanDuTestConfig.StabilityDurationMins) * time.Minute
interval := time.Duration(RanDuTestConfig.StabilityIntervalMins) * time.Minute
totalDuration := time.Duration(RanDuTestConfig.StabilityNoWorkloadDurMins) * time.Minute
interval := time.Duration(RanDuTestConfig.StabilityNoWorkloadIntMins) * time.Minute
startTime := time.Now()

By("Start collecting metrics during the stability test duration defined")
By(fmt.Sprintf("Collecting metrics during %d minutes", RanDuTestConfig.StabilityNoWorkloadDurMins))
for time.Since(startTime) < totalDuration {

if RanDuTestConfig.PtpEnabled {
Expand All @@ -64,7 +64,7 @@ var _ = Describe(

for _, namespace := range namespaces {
err = stability.SavePodsRestartsInNamespace(APIClient,
namespace, fmt.Sprintf("%s/stability_%s.log", outputDir, namespace))
namespace, fmt.Sprintf("%s/stability_no_workload_%s.log", outputDir, namespace))
if err != nil {
fmt.Printf("Error, could not save Pod restarts")
}
Expand All @@ -88,7 +88,9 @@ var _ = Describe(
// Verify podRestarts
By("Check Pod restarts")
for _, namespace := range namespaces {
_, err := stability.VerifyStabilityStatusChange(fmt.Sprintf("%s/stability_%s.log", outputDir, namespace))
_, err := stability.VerifyStabilityStatusChange(fmt.Sprintf("%s/stability_no_workload_%s.log",
outputDir,
namespace))
if err != nil {
stabilityErrors = append(stabilityErrors, err.Error())
}
Expand Down
14 changes: 7 additions & 7 deletions tests/system-tests/ran-du/tests/stability-workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ var _ = Describe(
It("StabilityWorkload", reportxml.ID("42744"), Label("StabilityWorkload"), func() {

outputDir := RanDuTestConfig.StabilityOutputPath
policiesOutputFile := fmt.Sprintf("%s/stability_policies.log", outputDir)
ptpOutputFile := fmt.Sprintf("%s/stability_ptp.log", outputDir)
policiesOutputFile := fmt.Sprintf("%s/stability_workload_policies.log", outputDir)
ptpOutputFile := fmt.Sprintf("%s/stability_workload_ptp.log", outputDir)
namespaces := []string{"openshift-etcd", "openshift-apiserver"}

totalDuration := time.Duration(RanDuTestConfig.StabilityDurationMins) * time.Minute
interval := time.Duration(RanDuTestConfig.StabilityIntervalMins) * time.Minute
totalDuration := time.Duration(RanDuTestConfig.StabilityWorkloadDurMins) * time.Minute
interval := time.Duration(RanDuTestConfig.StabilityWorkloadIntMins) * time.Minute
startTime := time.Now()

By(fmt.Sprintf("Collecting metrics during %d minutes", totalDuration))
By(fmt.Sprintf("Collecting metrics during %d minutes", RanDuTestConfig.StabilityWorkloadDurMins))
for time.Since(startTime) < totalDuration {

if RanDuTestConfig.PtpEnabled {
Expand All @@ -84,7 +84,7 @@ var _ = Describe(
}
for _, namespace := range namespaces {
err = stability.SavePodsRestartsInNamespace(APIClient,
namespace, fmt.Sprintf("%s/stability_%s.log", outputDir, namespace))
namespace, fmt.Sprintf("%s/stability_workload_%s.log", outputDir, namespace))
if err != nil {
fmt.Printf("Error, could not save pod restarts")
}
Expand All @@ -107,7 +107,7 @@ var _ = Describe(
// Verify podRestarts
By("Check Pod restarts")
for _, namespace := range namespaces {
_, err := stability.VerifyStabilityStatusChange(fmt.Sprintf("%s/stability_%s.log", outputDir, namespace))
_, err := stability.VerifyStabilityStatusChange(fmt.Sprintf("%s/stability_workload_%s.log", outputDir, namespace))
if err != nil {
stabilityErrors = append(stabilityErrors, err.Error())
}
Expand Down

0 comments on commit caf7e10

Please sign in to comment.