Skip to content

Commit

Permalink
Metrics features (#189)
Browse files Browse the repository at this point in the history
Signed-off-by: xshao <[email protected]>
  • Loading branch information
shaoxt authored Mar 2, 2021
1 parent 11d3ae6 commit 14e950e
Show file tree
Hide file tree
Showing 10 changed files with 393 additions and 7 deletions.
106 changes: 106 additions & 0 deletions api/v1alpha1/rollingupgrade_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"strconv"
"strings"
"time"

"github.com/keikoproj/upgrade-manager/controllers/common"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -52,6 +53,85 @@ type RollingUpgradeStatus struct {
Conditions []RollingUpgradeCondition `json:"conditions,omitempty"`
LastNodeTerminationTime metav1.Time `json:"lastTerminationTime,omitempty"`
LastNodeDrainTime metav1.Time `json:"lastDrainTime,omitempty"`

Statistics []*RollingUpgradeStatistics `json:"statistics,omitempty"`
InProcessingNodes map[string]*NodeInProcessing `json:"inProcessingNodes,omitempty"`
}

// RollingUpgrade Statistics, includes summary(sum/count) from each step
type RollingUpgradeStatistics struct {
StepName RollingUpgradeStep `json:"stepName,omitempty"`
DurationSum metav1.Duration `json:"durationSum,omitempty"`
DurationCount int32 `json:"durationCount,omitempty"`
}

// Node In-processing
type NodeInProcessing struct {
NodeName string `json:"nodeName,omitempty"`
StepName RollingUpgradeStep `json:"stepName,omitempty"`
UpgradeStartTime metav1.Time `json:"upgradeStartTime,omitempty"`
StepStartTime metav1.Time `json:"stepStartTime,omitempty"`
StepEndTime metav1.Time `json:"stepEndTime,omitempty"`
}

// Add one step duration
func (s *RollingUpgradeStatus) addStepDuration(asgName string, stepName RollingUpgradeStep, duration time.Duration) {
// if step exists, add count and sum, otherwise append
for _, s := range s.Statistics {
if s.StepName == stepName {
s.DurationSum = metav1.Duration{
Duration: s.DurationSum.Duration + duration,
}
s.DurationCount += 1
return
}
}
s.Statistics = append(s.Statistics, &RollingUpgradeStatistics{
StepName: stepName,
DurationSum: metav1.Duration{
Duration: duration,
},
DurationCount: 1,
})

//Add to system level statistics
common.AddRollingUpgradeStepDuration(asgName, string(stepName), duration)
}

// Node turns onto step
func (s *RollingUpgradeStatus) NodeStep(asgName string, nodeName string, stepName RollingUpgradeStep) {
if s.InProcessingNodes == nil {
s.InProcessingNodes = make(map[string]*NodeInProcessing)
}
var inProcessingNode *NodeInProcessing
if n, ok := s.InProcessingNodes[nodeName]; !ok {
inProcessingNode = &NodeInProcessing{
NodeName: nodeName,
StepName: stepName,
UpgradeStartTime: metav1.Now(),
StepStartTime: metav1.Now(),
}
s.InProcessingNodes[nodeName] = inProcessingNode
} else {
inProcessingNode = n
n.StepEndTime = metav1.Now()
var duration = n.StepEndTime.Sub(n.StepStartTime.Time)
if stepName == NodeRotationCompleted {
//Add overall and remove the node from in-processing map
var total = n.StepEndTime.Sub(n.UpgradeStartTime.Time)
s.addStepDuration(asgName, inProcessingNode.StepName, duration)
s.addStepDuration(asgName, NodeRotationTotal, total)
delete(s.InProcessingNodes, nodeName)
} else if inProcessingNode.StepName != stepName { //Still same step
var oldOrder = NodeRotationStepOrders[inProcessingNode.StepName]
var newOrder = NodeRotationStepOrders[stepName]
if newOrder > oldOrder { //Make sure the steps running in order
s.addStepDuration(asgName, inProcessingNode.StepName, duration)
n.StepStartTime = metav1.Now()
inProcessingNode.StepName = stepName
}
}
}
}

func (s *RollingUpgradeStatus) SetCondition(cond RollingUpgradeCondition) {
Expand Down Expand Up @@ -115,6 +195,8 @@ type NodeReadinessGate struct {
MatchLabels map[string]string `json:"matchLabels,omitempty" protobuf:"bytes,1,rep,name=matchLabels"`
}

type RollingUpgradeStep string

const (
// Status
StatusInit = "init"
Expand All @@ -124,8 +206,32 @@ const (

// Conditions
UpgradeComplete UpgradeConditionType = "Complete"

NodeRotationTotal RollingUpgradeStep = "total"

NodeRotationKickoff RollingUpgradeStep = "kickoff"
NodeRotationDesiredNodeReady RollingUpgradeStep = "desired_node_ready"
NodeRotationPredrainScript RollingUpgradeStep = "predrain_script"
NodeRotationDrain RollingUpgradeStep = "drain"
NodeRotationPostdrainScript RollingUpgradeStep = "postdrain_script"
NodeRotationPostWait RollingUpgradeStep = "post_wait"
NodeRotationTerminate RollingUpgradeStep = "terminate"
NodeRotationPostTerminate RollingUpgradeStep = "post_terminate"
NodeRotationCompleted RollingUpgradeStep = "completed"
)

var NodeRotationStepOrders = map[RollingUpgradeStep]int{
NodeRotationKickoff: 10,
NodeRotationDesiredNodeReady: 20,
NodeRotationPredrainScript: 30,
NodeRotationDrain: 40,
NodeRotationPostdrainScript: 50,
NodeRotationPostWait: 60,
NodeRotationTerminate: 70,
NodeRotationPostTerminate: 80,
NodeRotationCompleted: 1000,
}

var (
FiniteStates = []string{StatusComplete, StatusError}
AllowedStrategyType = []string{string(RandomUpdateStrategy), string(UniformAcrossAzUpdateStrategy)}
Expand Down
47 changes: 47 additions & 0 deletions api/v1alpha1/rollingupgrade_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package v1alpha1

import (
"github.com/onsi/gomega"
"testing"
)

// Test
func TestNodeTurnsOntoStep(t *testing.T) {
g := gomega.NewGomegaWithT(t)

r := &RollingUpgradeStatus{}

r.NodeStep("test-asg", "node-1", NodeRotationKickoff)

g.Expect(r.InProcessingNodes).NotTo(gomega.BeNil())
g.Expect(r.Statistics).To(gomega.BeNil())

r.NodeStep("test-asg", "node-1", NodeRotationDesiredNodeReady)

g.Expect(r.Statistics).NotTo(gomega.BeNil())
g.Expect(len(r.Statistics)).To(gomega.Equal(1))
g.Expect(r.Statistics[0].StepName).To(gomega.Equal(NodeRotationKickoff))

//Retry desired_node_ready
r.NodeStep("test-asg", "node-1", NodeRotationDesiredNodeReady)
g.Expect(len(r.Statistics)).To(gomega.Equal(1))
g.Expect(r.Statistics[0].StepName).To(gomega.Equal(NodeRotationKickoff))

//Retry desired_node_ready again
r.NodeStep("test-asg", "node-1", NodeRotationDesiredNodeReady)
g.Expect(len(r.Statistics)).To(gomega.Equal(1))
g.Expect(r.Statistics[0].StepName).To(gomega.Equal(NodeRotationKickoff))

//Completed
r.NodeStep("test-asg", "node-1", NodeRotationCompleted)
g.Expect(len(r.Statistics)).To(gomega.Equal(3))
g.Expect(r.Statistics[1].StepName).To(gomega.Equal(NodeRotationDesiredNodeReady))
g.Expect(r.Statistics[2].StepName).To(gomega.Equal(NodeRotationTotal))

//Second node
r.NodeStep("test-asg", "node-2", NodeRotationKickoff)
g.Expect(len(r.Statistics)).To(gomega.Equal(3))

r.NodeStep("test-asg", "node-2", NodeRotationDesiredNodeReady)
g.Expect(len(r.Statistics)).To(gomega.Equal(3))
}
60 changes: 60 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions config/crd/bases/upgrademgr.keikoproj.io_rollingupgrades.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,25 @@ spec:
type: string
endTime:
type: string
inProcessingNodes:
additionalProperties:
description: Node In-processing
properties:
nodeName:
type: string
stepEndTime:
format: date-time
type: string
stepName:
type: string
stepStartTime:
format: date-time
type: string
upgradeStartTime:
format: date-time
type: string
type: object
type: object
lastDrainTime:
format: date-time
type: string
Expand All @@ -143,6 +162,20 @@ spec:
type: integer
startTime:
type: string
statistics:
items:
description: RollingUpgrade Statistics, includes summary(sum/count)
from each step
properties:
durationCount:
format: int32
type: integer
durationSum:
type: string
stepName:
type: string
type: object
type: array
totalNodes:
type: integer
totalProcessingTime:
Expand Down
74 changes: 74 additions & 0 deletions controllers/common/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package common

import (
"github.com/keikoproj/upgrade-manager/controllers/common/log"
"github.com/prometheus/client_golang/prometheus"
"reflect"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"strings"
"time"
)

//All cluster level node upgrade statistics

var nodeRotationTotal = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "node",
Name: "rotation_total_seconds",
Help: "Node rotation total",
Buckets: []float64{
10.0,
30.0,
60.0,
90.0,
120.0,
180.0,
300.0,
600.0,
900.0,
},
})

var stepSummaries = make(map[string]map[string]prometheus.Summary)

func InitMetrics() {
metrics.Registry.MustRegister(nodeRotationTotal)
}

// Add rolling update step duration when the step is completed
func AddRollingUpgradeStepDuration(asgName string, stepName string, duration time.Duration) {
if strings.EqualFold(stepName, "total") { //Histogram
nodeRotationTotal.Observe(duration.Seconds())
} else { //Summary
var steps map[string]prometheus.Summary
if m, ok := stepSummaries[asgName]; !ok {
steps = make(map[string]prometheus.Summary)
stepSummaries[asgName] = steps
} else {
steps = m
}

var summary prometheus.Summary
if s, ok := steps[stepName]; !ok {
summary = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: "node",
Name: stepName + "_seconds",
Help: "Summary for node " + stepName,
ConstLabels: prometheus.Labels{"asg": asgName},
})
err := metrics.Registry.Register(summary)
if err != nil {
if reflect.TypeOf(err).String() == "prometheus.AlreadyRegisteredError" {
log.Warnf("summary was registered again, ASG: %s, step: %s", asgName, stepName)
} else {
log.Errorf("register summary error, ASG: %s, step: %s, %v", asgName, stepName, err)
}
}
steps[stepName] = summary
} else {
summary = s
}
summary.Observe(duration.Seconds())
}
}
Loading

0 comments on commit 14e950e

Please sign in to comment.