Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine metrics implementation to support goroutines #196

Merged
merged 5 commits into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 82 additions & 32 deletions api/v1alpha1/rollingupgrade_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ type RollingUpgradeStatus struct {
LastNodeTerminationTime metav1.Time `json:"lastTerminationTime,omitempty"`
LastNodeDrainTime metav1.Time `json:"lastDrainTime,omitempty"`

Statistics []*RollingUpgradeStatistics `json:"statistics,omitempty"`
InProcessingNodes map[string]*NodeInProcessing `json:"inProcessingNodes,omitempty"`
Statistics []*RollingUpgradeStatistics `json:"statistics,omitempty"`
LastBatchNodes []string `json:"lastBatchNodes,omitempty"`
}

// RollingUpgrade Statistics, includes summary(sum/count) from each step
Expand All @@ -65,6 +65,14 @@ type RollingUpgradeStatistics struct {
DurationCount int32 `json:"durationCount,omitempty"`
}

// RollingUpgrade Node step information
type NodeStepDuration struct {
GroupName string `json:"groupName,omitempty"`
NodeName string `json:"nodeName,omitempty"`
StepName RollingUpgradeStep `json:"stepName,omitempty"`
Duration metav1.Duration `json:"duration,omitempty"`
}

// Node In-processing
type NodeInProcessing struct {
NodeName string `json:"nodeName,omitempty"`
Expand All @@ -74,66 +82,108 @@ type NodeInProcessing struct {
StepEndTime metav1.Time `json:"stepEndTime,omitempty"`
}

// Update last batch nodes
func (s *RollingUpgradeStatus) UpdateLastBatchNodes(batchNodes map[string]*NodeInProcessing) {
keys := make([]string, 0, len(batchNodes))
for k := range batchNodes {
keys = append(keys, k)
}
s.LastBatchNodes = keys
}

// Update Node Statistics
func (s *RollingUpgradeStatus) UpdateStatistics(nodeSteps map[string][]NodeStepDuration) {
for _, v := range nodeSteps {
for _, step := range v {
s.AddNodeStepDuration(step)
}
}
}

// Add one step duration
func (s *RollingUpgradeStatus) addStepDuration(asgName string, stepName RollingUpgradeStep, duration time.Duration) {
func (s *RollingUpgradeStatus) ToStepDuration(groupName, nodeName string, stepName RollingUpgradeStep, duration time.Duration) NodeStepDuration {
//Add to system level statistics
common.AddStepDuration(groupName, string(stepName), duration)
return NodeStepDuration{
GroupName: groupName,
NodeName: nodeName,
StepName: stepName,
Duration: metav1.Duration{
Duration: duration,
},
}
}

// Add one step duration
func (s *RollingUpgradeStatus) AddNodeStepDuration(nsd NodeStepDuration) {
// if step exists, add count and sum, otherwise append
for _, s := range s.Statistics {
if s.StepName == stepName {
if s.StepName == nsd.StepName {
s.DurationSum = metav1.Duration{
Duration: s.DurationSum.Duration + duration,
Duration: s.DurationSum.Duration + nsd.Duration.Duration,
}
s.DurationCount += 1
return
}
}
s.Statistics = append(s.Statistics, &RollingUpgradeStatistics{
StepName: stepName,
StepName: nsd.StepName,
DurationSum: metav1.Duration{
Duration: duration,
Duration: nsd.Duration.Duration,
},
DurationCount: 1,
})

//Add to system level statistics
common.AddRollingUpgradeStepDuration(asgName, string(stepName), duration)
}

// Node turns onto step
func (s *RollingUpgradeStatus) NodeStep(asgName string, nodeName string, stepName RollingUpgradeStep) {
if s.InProcessingNodes == nil {
s.InProcessingNodes = make(map[string]*NodeInProcessing)
}
func (s *RollingUpgradeStatus) NodeStep(InProcessingNodes map[string]*NodeInProcessing,
nodeSteps map[string][]NodeStepDuration, groupName, nodeName string, stepName RollingUpgradeStep) {

var inProcessingNode *NodeInProcessing
if n, ok := s.InProcessingNodes[nodeName]; !ok {
if n, ok := InProcessingNodes[nodeName]; !ok {
inProcessingNode = &NodeInProcessing{
NodeName: nodeName,
StepName: stepName,
UpgradeStartTime: metav1.Now(),
StepStartTime: metav1.Now(),
}
s.InProcessingNodes[nodeName] = inProcessingNode
InProcessingNodes[nodeName] = inProcessingNode
} else {
inProcessingNode = n
n.StepEndTime = metav1.Now()
var duration = n.StepEndTime.Sub(n.StepStartTime.Time)
if stepName == NodeRotationCompleted {
//Add overall and remove the node from in-processing map
var total = n.StepEndTime.Sub(n.UpgradeStartTime.Time)
s.addStepDuration(asgName, inProcessingNode.StepName, duration)
s.addStepDuration(asgName, NodeRotationTotal, total)
delete(s.InProcessingNodes, nodeName)
} else if inProcessingNode.StepName != stepName { //Still same step
var oldOrder = NodeRotationStepOrders[inProcessingNode.StepName]
var newOrder = NodeRotationStepOrders[stepName]
if newOrder > oldOrder { //Make sure the steps running in order
s.addStepDuration(asgName, inProcessingNode.StepName, duration)
n.StepStartTime = metav1.Now()
inProcessingNode.StepName = stepName
}
}

inProcessingNode.StepEndTime = metav1.Now()
var duration = inProcessingNode.StepEndTime.Sub(inProcessingNode.StepStartTime.Time)
if stepName == NodeRotationCompleted {
//Add overall and remove the node from in-processing map
var total = inProcessingNode.StepEndTime.Sub(inProcessingNode.UpgradeStartTime.Time)
duration1 := s.ToStepDuration(groupName, nodeName, inProcessingNode.StepName, duration)
duration2 := s.ToStepDuration(groupName, nodeName, NodeRotationTotal, total)
s.addNodeStepDuration(nodeSteps, nodeName, duration1)
s.addNodeStepDuration(nodeSteps, nodeName, duration2)
} else if inProcessingNode.StepName != stepName { //Still same step
var oldOrder = NodeRotationStepOrders[inProcessingNode.StepName]
var newOrder = NodeRotationStepOrders[stepName]
if newOrder > oldOrder { //Make sure the steps running in order
stepDuration := s.ToStepDuration(groupName, nodeName, inProcessingNode.StepName, duration)
inProcessingNode.StepStartTime = metav1.Now()
inProcessingNode.StepName = stepName
s.addNodeStepDuration(nodeSteps, nodeName, stepDuration)
}
}
}

func (s *RollingUpgradeStatus) addNodeStepDuration(steps map[string][]NodeStepDuration, nodeName string, nsd NodeStepDuration) {
if stepDuration, ok := steps[nodeName]; !ok {
steps[nodeName] = []NodeStepDuration{
nsd,
}
} else {
stepDuration = append(stepDuration, nsd)
steps[nodeName] = stepDuration
}
}

func (s *RollingUpgradeStatus) SetCondition(cond RollingUpgradeCondition) {
// if condition exists, overwrite, otherwise append
for ix, c := range s.Conditions {
Expand Down
44 changes: 23 additions & 21 deletions api/v1alpha1/rollingupgrade_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,40 @@ func TestNodeTurnsOntoStep(t *testing.T) {
g := gomega.NewGomegaWithT(t)

r := &RollingUpgradeStatus{}
//A map to retain the steps for multiple nodes
nodeSteps := make(map[string][]NodeStepDuration)
inProcessingNodes := make(map[string]*NodeInProcessing)

r.NodeStep("test-asg", "node-1", NodeRotationKickoff)
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-1", NodeRotationKickoff)

g.Expect(r.InProcessingNodes).NotTo(gomega.BeNil())
g.Expect(r.Statistics).To(gomega.BeNil())
g.Expect(inProcessingNodes).NotTo(gomega.BeNil())
g.Expect(nodeSteps["node-1"]).To(gomega.BeNil())

r.NodeStep("test-asg", "node-1", NodeRotationDesiredNodeReady)
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-1", NodeRotationDesiredNodeReady)

g.Expect(r.Statistics).NotTo(gomega.BeNil())
g.Expect(len(r.Statistics)).To(gomega.Equal(1))
g.Expect(r.Statistics[0].StepName).To(gomega.Equal(NodeRotationKickoff))
g.Expect(len(nodeSteps["node-1"])).To(gomega.Equal(1))
g.Expect(nodeSteps["node-1"][0].StepName).To(gomega.Equal(NodeRotationKickoff))

//Retry desired_node_ready
r.NodeStep("test-asg", "node-1", NodeRotationDesiredNodeReady)
g.Expect(len(r.Statistics)).To(gomega.Equal(1))
g.Expect(r.Statistics[0].StepName).To(gomega.Equal(NodeRotationKickoff))
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-1", NodeRotationDesiredNodeReady)
g.Expect(len(nodeSteps["node-1"])).To(gomega.Equal(1))
g.Expect(nodeSteps["node-1"][0].StepName).To(gomega.Equal(NodeRotationKickoff))

//Retry desired_node_ready again
r.NodeStep("test-asg", "node-1", NodeRotationDesiredNodeReady)
g.Expect(len(r.Statistics)).To(gomega.Equal(1))
g.Expect(r.Statistics[0].StepName).To(gomega.Equal(NodeRotationKickoff))
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-1", NodeRotationDesiredNodeReady)
g.Expect(len(nodeSteps["node-1"])).To(gomega.Equal(1))
g.Expect(nodeSteps["node-1"][0].StepName).To(gomega.Equal(NodeRotationKickoff))

//Completed
r.NodeStep("test-asg", "node-1", NodeRotationCompleted)
g.Expect(len(r.Statistics)).To(gomega.Equal(3))
g.Expect(r.Statistics[1].StepName).To(gomega.Equal(NodeRotationDesiredNodeReady))
g.Expect(r.Statistics[2].StepName).To(gomega.Equal(NodeRotationTotal))
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-1", NodeRotationCompleted)
g.Expect(len(nodeSteps["node-1"])).To(gomega.Equal(3))
g.Expect(nodeSteps["node-1"][1].StepName).To(gomega.Equal(NodeRotationDesiredNodeReady))
g.Expect(nodeSteps["node-1"][2].StepName).To(gomega.Equal(NodeRotationTotal))

//Second node
r.NodeStep("test-asg", "node-2", NodeRotationKickoff)
g.Expect(len(r.Statistics)).To(gomega.Equal(3))
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-2", NodeRotationKickoff)
g.Expect(len(nodeSteps["node-1"])).To(gomega.Equal(3))

r.NodeStep("test-asg", "node-2", NodeRotationDesiredNodeReady)
g.Expect(len(r.Statistics)).To(gomega.Equal(3))
r.NodeStep(inProcessingNodes, nodeSteps, "test-asg", "node-2", NodeRotationDesiredNodeReady)
g.Expect(len(nodeSteps["node-1"])).To(gomega.Equal(3))
}
34 changes: 20 additions & 14 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 4 additions & 19 deletions config/crd/bases/upgrademgr.keikoproj.io_rollingupgrades.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,25 +133,10 @@ spec:
type: string
endTime:
type: string
inProcessingNodes:
additionalProperties:
description: Node In-processing
properties:
nodeName:
type: string
stepEndTime:
format: date-time
type: string
stepName:
type: string
stepStartTime:
format: date-time
type: string
upgradeStartTime:
format: date-time
type: string
type: object
type: object
lastBatchNodes:
items:
type: string
type: array
lastDrainTime:
format: date-time
type: string
Expand Down
12 changes: 6 additions & 6 deletions controllers/common/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ func InitMetrics() {
}

// Add rolling update step duration when the step is completed
func AddRollingUpgradeStepDuration(asgName string, stepName string, duration time.Duration) {
func AddStepDuration(groupName string, stepName string, duration time.Duration) {
if strings.EqualFold(stepName, "total") { //Histogram
nodeRotationTotal.Observe(duration.Seconds())
} else { //Summary
var steps map[string]prometheus.Summary
if m, ok := stepSummaries[asgName]; !ok {
if m, ok := stepSummaries[groupName]; !ok {
steps = make(map[string]prometheus.Summary)
stepSummaries[asgName] = steps
stepSummaries[groupName] = steps
} else {
steps = m
}
Expand All @@ -55,14 +55,14 @@ func AddRollingUpgradeStepDuration(asgName string, stepName string, duration tim
Namespace: "node",
Name: stepName + "_seconds",
Help: "Summary for node " + stepName,
ConstLabels: prometheus.Labels{"asg": asgName},
ConstLabels: prometheus.Labels{"group": groupName},
})
err := metrics.Registry.Register(summary)
if err != nil {
if reflect.TypeOf(err).String() == "prometheus.AlreadyRegisteredError" {
log.Warnf("summary was registered again, ASG: %s, step: %s", asgName, stepName)
log.Warnf("summary was registered again, group: %s, step: %s", groupName, stepName)
} else {
log.Errorf("register summary error, ASG: %s, step: %s, %v", asgName, stepName, err)
log.Errorf("register summary error, group: %s, step: %s, %v", groupName, stepName, err)
}
}
steps[stepName] = summary
Expand Down
8 changes: 4 additions & 4 deletions controllers/common/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ func TestAddRollingUpgradeStepDuration(t *testing.T) {
g := gomega.NewGomegaWithT(t)

g.Expect(stepSummaries["test-asg"]).To(gomega.BeNil())
AddRollingUpgradeStepDuration("test-asg", "kickoff", 1)
AddStepDuration("test-asg", "kickoff", 1)

g.Expect(stepSummaries["test-asg"]).NotTo(gomega.BeNil())
g.Expect(stepSummaries["test-asg"]["kickoff"]).NotTo(gomega.BeNil())

//Test duplicate
AddRollingUpgradeStepDuration("test-asg", "kickoff", 1)
AddStepDuration("test-asg", "kickoff", 1)
g.Expect(stepSummaries["test-asg"]["kickoff"]).NotTo(gomega.BeNil())

//Test duplicate
delete(stepSummaries["test-asg"], "kickoff")
AddRollingUpgradeStepDuration("test-asg", "kickoff", 1)
AddStepDuration("test-asg", "kickoff", 1)
g.Expect(stepSummaries["test-asg"]["kickoff"]).NotTo(gomega.BeNil())

//Test total
AddRollingUpgradeStepDuration("test-asg", "total", 1)
AddStepDuration("test-asg", "total", 1)
g.Expect(stepSummaries["test-asg"]["kickoff"]).NotTo(gomega.BeNil())
}
Loading