Skip to content

Commit

Permalink
#2285: rollup CR statistic metrics in v2 (#218)
Browse files Browse the repository at this point in the history
* #2285: rollup CR statistic metrics in v2

Signed-off-by: sbadla1 <[email protected]>

* #2285: updated metric flags

Signed-off-by: sbadla1 <[email protected]>

* #2285: updated metric flags

Signed-off-by: sbadla1 <[email protected]>
  • Loading branch information
sahilbadla authored Apr 21, 2021
1 parent d5935e3 commit 1f0f075
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 26 deletions.
77 changes: 53 additions & 24 deletions controllers/common/metrics.go
Original file line number Diff line number Diff line change
@@ -1,38 +1,55 @@
package common

import (
"github.com/keikoproj/upgrade-manager/controllers/common/log"
"github.com/prometheus/client_golang/prometheus"
"reflect"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"strings"
"time"

"github.com/keikoproj/upgrade-manager/controllers/common/log"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

//All cluster level node upgrade statistics
var (
metricNamespace = "upgrade_manager_v2"

var nodeRotationTotal = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "node",
Name: "rotation_total_seconds",
Help: "Node rotation total",
Buckets: []float64{
10.0,
30.0,
60.0,
90.0,
120.0,
180.0,
300.0,
600.0,
900.0,
},
})
//All cluster level node upgrade statistics
nodeRotationTotal = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: metricNamespace,
Name: "node_rotation_total_seconds",
Help: "Node rotation total",
Buckets: []float64{
10.0,
30.0,
60.0,
90.0,
120.0,
180.0,
300.0,
600.0,
900.0,
},
})

stepSummaries = make(map[string]map[string]prometheus.Summary)

var stepSummaries = make(map[string]map[string]prometheus.Summary)
CRStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "resource_status",
Help: "Rollup CR statistics, partitioned by name.",
},
[]string{
// name of the CR
"resource_name",
},
)
)

func InitMetrics() {
metrics.Registry.MustRegister(nodeRotationTotal)
metrics.Registry.MustRegister(CRStatus)
}

// Add rolling update step duration when the step is completed
Expand All @@ -52,8 +69,8 @@ func AddStepDuration(groupName string, stepName string, duration time.Duration)
if s, ok := steps[stepName]; !ok {
summary = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: "node",
Name: stepName + "_seconds",
Namespace: metricNamespace,
Name: "node_" + stepName + "_seconds",
Help: "Summary for node " + stepName,
ConstLabels: prometheus.Labels{"group": groupName},
})
Expand All @@ -72,3 +89,15 @@ func AddStepDuration(groupName string, stepName string, duration time.Duration)
summary.Observe(duration.Seconds())
}
}

func SetRollupInitOrRunningStatus(ruName string) {
CRStatus.WithLabelValues(ruName).Set(0)
}

func SetRollupCompletedStatus(ruName string) {
CRStatus.WithLabelValues(ruName).Set(1)
}

func SetRollupFailedStatus(ruName string) {
CRStatus.WithLabelValues(ruName).Set(-1)
}
22 changes: 21 additions & 1 deletion controllers/common/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package common

import (
"github.com/onsi/gomega"
"testing"

"github.com/onsi/gomega"
)

func TestAddRollingUpgradeStepDuration(t *testing.T) {
Expand All @@ -27,3 +28,22 @@ func TestAddRollingUpgradeStepDuration(t *testing.T) {
AddStepDuration("test-asg", "total", 1)
g.Expect(stepSummaries["test-asg"]["kickoff"]).NotTo(gomega.BeNil())
}

func TestCRStatusCompleted(t *testing.T) {
g := gomega.NewGomegaWithT(t)

SetRollupInitOrRunningStatus("cr_test_1")
gauage, err := CRStatus.GetMetricWithLabelValues("cr_test_1")
g.Expect(err).To(gomega.BeNil())
g.Expect(gauage).ToNot(gomega.BeNil())

SetRollupCompletedStatus("cr_test_2")
gauage, err = CRStatus.GetMetricWithLabelValues("cr_test_2")
g.Expect(err).To(gomega.BeNil())
g.Expect(gauage).ToNot(gomega.BeNil())

SetRollupFailedStatus("cr_test_3")
gauage, err = CRStatus.GetMetricWithLabelValues("cr_test_3")
g.Expect(err).To(gomega.BeNil())
g.Expect(gauage).ToNot(gomega.BeNil())
}
5 changes: 5 additions & 0 deletions controllers/rollingupgrade_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,21 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
r.Info("admitted new rollingupgrade", "name", rollingUpgrade.NamespacedName(), "scalingGroup", scalingGroupName)
r.AdmissionMap.Store(rollingUpgrade.NamespacedName(), scalingGroupName)
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusInit)
common.SetRollupInitOrRunningStatus(rollingUpgrade.Name)

r.Cloud = NewDiscoveredState(r.Auth, r.Logger)
if err := r.Cloud.Discover(); err != nil {
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError)
// Set prometheus metric cr_status_failed
common.SetRollupFailedStatus(rollingUpgrade.Name)
return ctrl.Result{}, err
}

// process node rotation
if err := r.RotateNodes(rollingUpgrade); err != nil {
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError)
// Set prometheus metric cr_status_failed
common.SetRollupFailedStatus(rollingUpgrade.Name)
return ctrl.Result{}, err
}

Expand Down
3 changes: 3 additions & 0 deletions controllers/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (r *RollingUpgradeReconciler) RotateNodes(rollingUpgrade *v1alpha1.RollingU
drainInterval = rollingUpgrade.PostDrainDelaySeconds()
)
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusRunning)
common.SetRollupInitOrRunningStatus(rollingUpgrade.Name)

// set status start time
if rollingUpgrade.StartTime() == "" {
Expand Down Expand Up @@ -84,6 +85,8 @@ func (r *RollingUpgradeReconciler) RotateNodes(rollingUpgrade *v1alpha1.RollingU
// check if all instances are rotated.
if !r.IsScalingGroupDrifted(rollingUpgrade) {
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusComplete)
// Set prometheus metric cr_status_completed
common.SetRollupCompletedStatus(rollingUpgrade.Name)
return nil
}

Expand Down
47 changes: 46 additions & 1 deletion controllers/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func TestIsScalingGroupDrifted(t *testing.T) {
false,
},
{
"All instances have different launch config as the ASG, expect false from IsScalingGroupDrifted",
"All instances have different launch config as the ASG, expect true from IsScalingGroupDrifted",
createRollingUpgradeReconciler(t),
createRollingUpgrade(),
func() *MockAutoscalingGroup {
Expand All @@ -284,6 +284,51 @@ func TestIsScalingGroupDrifted(t *testing.T) {

}

func TestRotateNodes(t *testing.T) {
var tests = []struct {
TestDescription string
Reconciler *RollingUpgradeReconciler
RollingUpgrade *v1alpha1.RollingUpgrade
AsgClient *MockAutoscalingGroup
ExpectedValue bool
ExpectedStatusValue string
}{
{
"All instances have different launch config as the ASG, expect true from IsScalingGroupDrifted",
createRollingUpgradeReconciler(t),
createRollingUpgrade(),
func() *MockAutoscalingGroup {
newAsgClient := createASGClient()
newAsgClient.autoScalingGroups[0].LaunchConfigurationName = aws.String("different-launch-config")
return newAsgClient
}(),
true,
v1alpha1.StatusRunning,
},
{
"All instances have the same launch config as the ASG, expect false from IsScalingGroupDrifted",
createRollingUpgradeReconciler(t),
createRollingUpgrade(),
createASGClient(),
false,
v1alpha1.StatusComplete,
},
}
for _, test := range tests {
test.Reconciler.Cloud.ScalingGroups = test.AsgClient.autoScalingGroups
test.Reconciler.Auth.AmazonClientSet.AsgClient = test.AsgClient

err := test.Reconciler.RotateNodes(test.RollingUpgrade)
if err != nil {
t.Errorf("Test Description: \n expected value: nil, actual value: %v", err)
}
if test.RollingUpgrade.CurrentStatus() != test.ExpectedStatusValue {
t.Errorf("Test Description: %s \n expected value: %s, actual value: %s", test.TestDescription, test.ExpectedStatusValue, test.RollingUpgrade.CurrentStatus())
}
}

}

func TestDesiredNodesReady(t *testing.T) {
var tests = []struct {
TestDescription string
Expand Down

0 comments on commit 1f0f075

Please sign in to comment.