Skip to content

Commit

Permalink
Wait for metric increase
Browse files Browse the repository at this point in the history
  • Loading branch information
dippynark committed Jan 7, 2024
1 parent 1a95670 commit 9ad6dca
Showing 1 changed file with 28 additions and 30 deletions.
58 changes: 28 additions & 30 deletions e2e/spot_migrator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,31 +139,13 @@ func TestSpotMigrator(t *testing.T) {
require.Nil(t, err)
t.Logf("Deployment %s/%s is unavailable!", deployment.Namespace, deployment.Name)

// Verify that all control plane Nodes are schedulable
controlPlaneNodeSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "node-role.kubernetes.io/control-plane",
Operator: "Exists",
},
},
})
require.Nil(t, err)
nodeList = &corev1.NodeList{}
err = kubeClient.List(ctx, nodeList, client.MatchingLabelsSelector{Selector: controlPlaneNodeSelector})
require.Nil(t, err)
require.Greater(t, len(nodeList.Items), 0)
for _, node := range nodeList.Items {
require.False(t, node.Spec.Unschedulable)
}

// Delete Node; typically this would be done by the node controller but we simulate it here:
// https://github.com/hsbc/cost-manager/blob/bf176ada100e19a765d276aee1a0a2d6038275e0/pkg/controller/spot_migrator.go#L242-L250
err = kubeClient.Delete(ctx, node)
require.Nil(t, err)

// Wait for Prometheus metric to indicate successful migration
t.Logf("Waiting for successful Prometheus metric...")
t.Logf("Waiting for Prometheus metric to indicate successful migration...")
pod, err := kubernetes.WaitForAnyReadyPod(ctx, kubeClient, client.InNamespace("monitoring"), client.MatchingLabels{"app.kubernetes.io/name": "prometheus"})
require.Nil(t, err)
// Port forward to Prometheus in the background
Expand All @@ -180,25 +162,41 @@ func TestSpotMigrator(t *testing.T) {
})
require.Nil(t, err)
prometheusAPI := prometheusv1.NewAPI(prometheusClient)
// Wait for the number of successful operations to increase
results, _, err := prometheusAPI.Query(ctx, `sum(cost_manager_spot_migrator_operation_success_total{job="cost-manager",namespace="cost-manager"})`, time.Now())
require.Nil(t, err)
require.Equal(t, 1, len(results.(model.Vector)))
currentMetricValue := results.(model.Vector)[0].Value
for {
results, _, err := prometheusAPI.Query(ctx, "cost_manager_spot_migrator_operation_success_total", time.Now())
results, _, err := prometheusAPI.Query(ctx, `sum(cost_manager_spot_migrator_operation_success_total{job="cost-manager",namespace="cost-manager"})`, time.Now())
require.Nil(t, err)
// Any result with a value greater than 0 indicates migration success
migrationSuccess := false
for _, result := range results.(model.Vector) {
if result.Value > 0 {
migrationSuccess = true
break
}
}
if migrationSuccess {
require.Equal(t, 1, len(results.(model.Vector)))
if results.(model.Vector)[0].Value > currentMetricValue {
break
}
time.Sleep(time.Second)
}
t.Logf("Found successful Prometheus metric!")
t.Logf("Migration successful!")

// Delete Namespace
err = kubeClient.Delete(ctx, namespace)
require.Nil(t, err)

// Verify that all control plane Nodes are schedulable
controlPlaneNodeSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "node-role.kubernetes.io/control-plane",
Operator: "Exists",
},
},
})
require.Nil(t, err)
nodeList = &corev1.NodeList{}
err = kubeClient.List(ctx, nodeList, client.MatchingLabelsSelector{Selector: controlPlaneNodeSelector})
require.Nil(t, err)
require.Greater(t, len(nodeList.Items), 0)
for _, node := range nodeList.Items {
require.False(t, node.Spec.Unschedulable)
}
}

0 comments on commit 9ad6dca

Please sign in to comment.