Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#2285: rollup CR statistic metrics in v2 #218

Merged
merged 3 commits into from
Apr 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 53 additions & 24 deletions controllers/common/metrics.go
Original file line number Diff line number Diff line change
@@ -1,38 +1,55 @@
package common

import (
"github.com/keikoproj/upgrade-manager/controllers/common/log"
"github.com/prometheus/client_golang/prometheus"
"reflect"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"strings"
"time"

"github.com/keikoproj/upgrade-manager/controllers/common/log"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

//All cluster level node upgrade statistics
var (
metricNamespace = "upgrade_manager_v2"

var nodeRotationTotal = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "node",
Name: "rotation_total_seconds",
Help: "Node rotation total",
Buckets: []float64{
10.0,
30.0,
60.0,
90.0,
120.0,
180.0,
300.0,
600.0,
900.0,
},
})
//All cluster level node upgrade statistics
nodeRotationTotal = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: metricNamespace,
Name: "node_rotation_total_seconds",
Help: "Node rotation total",
Buckets: []float64{
10.0,
30.0,
60.0,
90.0,
120.0,
180.0,
300.0,
600.0,
900.0,
},
})

stepSummaries = make(map[string]map[string]prometheus.Summary)

var stepSummaries = make(map[string]map[string]prometheus.Summary)
CRStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "resource_status",
Help: "Rollup CR statistics, partitioned by name.",
},
[]string{
// name of the CR
"resource_name",
},
)
)

func InitMetrics() {
metrics.Registry.MustRegister(nodeRotationTotal)
metrics.Registry.MustRegister(CRStatus)
}

// Add rolling update step duration when the step is completed
Expand All @@ -52,8 +69,8 @@ func AddStepDuration(groupName string, stepName string, duration time.Duration)
if s, ok := steps[stepName]; !ok {
summary = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: "node",
Name: stepName + "_seconds",
Namespace: metricNamespace,
Name: "node_" + stepName + "_seconds",
Help: "Summary for node " + stepName,
ConstLabels: prometheus.Labels{"group": groupName},
})
Expand All @@ -72,3 +89,15 @@ func AddStepDuration(groupName string, stepName string, duration time.Duration)
summary.Observe(duration.Seconds())
}
}

func SetRollupInitOrRunningStatus(ruName string) {
CRStatus.WithLabelValues(ruName).Set(0)
}

func SetRollupCompletedStatus(ruName string) {
CRStatus.WithLabelValues(ruName).Set(1)
}

func SetRollupFailedStatus(ruName string) {
CRStatus.WithLabelValues(ruName).Set(-1)
}
22 changes: 21 additions & 1 deletion controllers/common/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package common

import (
"github.com/onsi/gomega"
"testing"

"github.com/onsi/gomega"
)

func TestAddRollingUpgradeStepDuration(t *testing.T) {
Expand All @@ -27,3 +28,22 @@ func TestAddRollingUpgradeStepDuration(t *testing.T) {
AddStepDuration("test-asg", "total", 1)
g.Expect(stepSummaries["test-asg"]["kickoff"]).NotTo(gomega.BeNil())
}

func TestCRStatusCompleted(t *testing.T) {
g := gomega.NewGomegaWithT(t)

SetRollupInitOrRunningStatus("cr_test_1")
gauage, err := CRStatus.GetMetricWithLabelValues("cr_test_1")
g.Expect(err).To(gomega.BeNil())
g.Expect(gauage).ToNot(gomega.BeNil())

SetRollupCompletedStatus("cr_test_2")
gauage, err = CRStatus.GetMetricWithLabelValues("cr_test_2")
g.Expect(err).To(gomega.BeNil())
g.Expect(gauage).ToNot(gomega.BeNil())

SetRollupFailedStatus("cr_test_3")
gauage, err = CRStatus.GetMetricWithLabelValues("cr_test_3")
g.Expect(err).To(gomega.BeNil())
g.Expect(gauage).ToNot(gomega.BeNil())
}
5 changes: 5 additions & 0 deletions controllers/rollingupgrade_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,21 @@ func (r *RollingUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
r.Info("admitted new rollingupgrade", "name", rollingUpgrade.NamespacedName(), "scalingGroup", scalingGroupName)
r.AdmissionMap.Store(rollingUpgrade.NamespacedName(), scalingGroupName)
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusInit)
common.SetRollupInitOrRunningStatus(rollingUpgrade.Name)

r.Cloud = NewDiscoveredState(r.Auth, r.Logger)
if err := r.Cloud.Discover(); err != nil {
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError)
// Set prometheus metric cr_status_failed
common.SetRollupFailedStatus(rollingUpgrade.Name)
return ctrl.Result{}, err
}

// process node rotation
if err := r.RotateNodes(rollingUpgrade); err != nil {
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusError)
// Set prometheus metric cr_status_failed
common.SetRollupFailedStatus(rollingUpgrade.Name)
return ctrl.Result{}, err
}

Expand Down
3 changes: 3 additions & 0 deletions controllers/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (r *RollingUpgradeReconciler) RotateNodes(rollingUpgrade *v1alpha1.RollingU
drainInterval = rollingUpgrade.PostDrainDelaySeconds()
)
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusRunning)
common.SetRollupInitOrRunningStatus(rollingUpgrade.Name)

// set status start time
if rollingUpgrade.StartTime() == "" {
Expand Down Expand Up @@ -84,6 +85,8 @@ func (r *RollingUpgradeReconciler) RotateNodes(rollingUpgrade *v1alpha1.RollingU
// check if all instances are rotated.
if !r.IsScalingGroupDrifted(rollingUpgrade) {
rollingUpgrade.SetCurrentStatus(v1alpha1.StatusComplete)
// Set prometheus metric cr_status_completed
common.SetRollupCompletedStatus(rollingUpgrade.Name)
return nil
}

Expand Down
47 changes: 46 additions & 1 deletion controllers/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func TestIsScalingGroupDrifted(t *testing.T) {
false,
},
{
"All instances have different launch config as the ASG, expect false from IsScalingGroupDrifted",
"All instances have different launch config as the ASG, expect true from IsScalingGroupDrifted",
createRollingUpgradeReconciler(t),
createRollingUpgrade(),
func() *MockAutoscalingGroup {
Expand All @@ -284,6 +284,51 @@ func TestIsScalingGroupDrifted(t *testing.T) {

}

func TestRotateNodes(t *testing.T) {
var tests = []struct {
TestDescription string
Reconciler *RollingUpgradeReconciler
RollingUpgrade *v1alpha1.RollingUpgrade
AsgClient *MockAutoscalingGroup
ExpectedValue bool
ExpectedStatusValue string
}{
{
"All instances have different launch config as the ASG, expect true from IsScalingGroupDrifted",
createRollingUpgradeReconciler(t),
createRollingUpgrade(),
func() *MockAutoscalingGroup {
newAsgClient := createASGClient()
newAsgClient.autoScalingGroups[0].LaunchConfigurationName = aws.String("different-launch-config")
return newAsgClient
}(),
true,
v1alpha1.StatusRunning,
},
{
"All instances have the same launch config as the ASG, expect false from IsScalingGroupDrifted",
createRollingUpgradeReconciler(t),
createRollingUpgrade(),
createASGClient(),
false,
v1alpha1.StatusComplete,
},
}
for _, test := range tests {
test.Reconciler.Cloud.ScalingGroups = test.AsgClient.autoScalingGroups
test.Reconciler.Auth.AmazonClientSet.AsgClient = test.AsgClient

err := test.Reconciler.RotateNodes(test.RollingUpgrade)
if err != nil {
t.Errorf("Test Description: \n expected value: nil, actual value: %v", err)
}
if test.RollingUpgrade.CurrentStatus() != test.ExpectedStatusValue {
t.Errorf("Test Description: %s \n expected value: %s, actual value: %s", test.TestDescription, test.ExpectedStatusValue, test.RollingUpgrade.CurrentStatus())
}
}

}

func TestDesiredNodesReady(t *testing.T) {
var tests = []struct {
TestDescription string
Expand Down