Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Fleet Active GameServerSet Percentage Metrics #4021

Merged
merged 6 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 123 additions & 6 deletions build/grafana/dashboard-gameservers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,89 @@ data:
"links": [],
"panels": [
{
"gridPos": {
"h": 6,
"w": 5,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"targets": [
{
"expr": "sum(agones_fleet_rollout_percent{name=~\"$fleet\", namespace=~\"$namespace\", type=\"current_replicas\"}) / sum(agones_fleet_rollout_percent{name=~\"$fleet\", namespace=~\"$namespace\", type=\"desired_replicas\"}) * 100",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"options": {
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": true
},
"orientation": "horizontal",
"textMode": "value",
"colorMode": "value",
"valueMappings": []
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {},
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "dark-red",
"value": null
},
{
"color": "dark-orange",
"value": 20
},
{
"color": "dark-green",
"value": 50
}
]
},
"unit": "percent",
"links": []
},
"overrides": []
},
"legend": {
"show": false
},
"timeShift": null,
"nullPointMode": "null",
"options": {
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"tooltip": {
"shared": false,
"value_type": "individual"
},
"type": "stat",
"title": "Fleet RollOut Percentage"
},
{
"aliasColors": {},
"breakPoint": "50%",
"cacheTimeout": null,
"combine": {
"label": "Others",
"threshold": 0
},
"fieldConfig": {
"defaults": {
"color": {
Expand All @@ -72,8 +151,8 @@ data:
},
"gridPos": {
"h": 6,
"w": 7,
"x": 0,
"w": 5,
"x": 5,
"y": 0
},
"id": 4,
Expand Down Expand Up @@ -224,8 +303,8 @@ data:
},
"gridPos": {
"h": 6,
"w": 17,
"x": 7,
"w": 14,
"x": 10,
"y": 0
},
"id": 2,
Expand Down Expand Up @@ -401,8 +480,46 @@ data:
"refId": "A"
}
],
"title": "GameServers count per type",
"type": "timeseries"
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GameServer count overview",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": {
Expand Down
55 changes: 55 additions & 0 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ import (
"agones.dev/agones/pkg/client/informers/externalversions"
listerv1 "agones.dev/agones/pkg/client/listers/agones/v1"
autoscalinglisterv1 "agones.dev/agones/pkg/client/listers/autoscaling/v1"
fleetsv1 "agones.dev/agones/pkg/fleets"
"agones.dev/agones/pkg/util/runtime"
lru "github.com/hashicorp/golang-lru"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.opencensus.io/stats"
"go.opencensus.io/tag"
corev1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -77,6 +79,7 @@ type Controller struct {
gameServerSynced cache.InformerSynced
fleetSynced cache.InformerSynced
fleetLister listerv1.FleetLister
gameServerSetLister listerv1.GameServerSetLister
fasSynced cache.InformerSynced
fasLister autoscalinglisterv1.FleetAutoscalerLister
lock sync.Mutex
Expand All @@ -103,6 +106,8 @@ func NewController(
fasInformer := fas.Informer()
node := kubeInformerFactory.Core().V1().Nodes()

gameServerSets := agonesInformerFactory.Agones().V1().GameServerSets()

// GameServerStateLastChange Contains the time when the GameServer
// changed its state last time
// on delete and state change remove GameServerName key
Expand All @@ -117,6 +122,7 @@ func NewController(
gameServerSynced: gsInformer.HasSynced,
fleetSynced: fInformer.HasSynced,
fleetLister: fleets.Lister(),
gameServerSetLister: gameServerSets.Lister(),
fasSynced: fasInformer.HasSynced,
fasLister: fas.Lister(),
gsCount: GameServerCount{},
Expand Down Expand Up @@ -240,6 +246,8 @@ func (c *Controller) recordFleetChanges(obj interface{}) {
c.recordFleetReplicas(f.Name, f.Namespace, f.Status.Replicas, f.Status.AllocatedReplicas,
f.Status.ReadyReplicas, f.Spec.Replicas, f.Status.ReservedReplicas)

c.recordFleetRolloutPercentage(f)

if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
if f.Status.Counters != nil {
c.recordCounters(f.Name, f.Namespace, f.Status.Counters)
Expand All @@ -250,6 +258,53 @@ func (c *Controller) recordFleetChanges(obj interface{}) {
}
}

func (c *Controller) recordFleetRolloutPercentage(fleet *agonesv1.Fleet) {
list, err := fleetsv1.ListGameServerSetsByFleetOwner(c.gameServerSetLister, fleet)
if err != nil {
c.logger.Errorf("Error listing GameServerSets for fleet %s in namespace %s: %v", fleet.Name, fleet.Namespace, err.Error())
return
}

active, _ := c.filterGameServerSetByActive(fleet, list)

if active == nil {
fleetName := fleet.ObjectMeta.Namespace + "/" + fleet.ObjectMeta.Name
c.logger.Debugf("Could not find active GameServerSet %s", fleetName)
active = fleet.GameServerSet()
}

currentReplicas := active.Status.Replicas
desiredReplicas := fleet.Spec.Replicas

ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleet.Name), tag.Upsert(keyNamespace, fleet.GetNamespace()))

// Record current replicas count
RecordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "current_replicas")},
fleetRolloutPercentStats.M(int64(currentReplicas)))

// Record desired replicas count
RecordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "desired_replicas")},
fleetRolloutPercentStats.M(int64(desiredReplicas)))
}

// filterGameServerSetByActive returns the active GameServerSet (or nil if it
// doesn't exist) and then the rest of the GameServerSets that are controlled
// by this Fleet
func (c *Controller) filterGameServerSetByActive(fleet *agonesv1.Fleet, list []*agonesv1.GameServerSet) (*agonesv1.GameServerSet, []*agonesv1.GameServerSet) {
var active *agonesv1.GameServerSet
var rest []*agonesv1.GameServerSet

for _, gsSet := range list {
if apiequality.Semantic.DeepEqual(gsSet.Spec.Template, fleet.Spec.Template) {
active = gsSet
} else {
rest = append(rest, gsSet)
}
}

return active, rest
}

func (c *Controller) recordFleetDeletion(obj interface{}) {
_, ok := obj.(*agonesv1.Fleet)
if !ok {
Expand Down
11 changes: 10 additions & 1 deletion pkg/metrics/controller_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
)

const (
fleetRolloutPercent = "fleet_rollout_percent"
fleetReplicaCountName = "fleets_replicas_count"
fleetAutoscalerBufferLimitName = "fleet_autoscalers_buffer_limits"
fleetAutoscalterBufferSizeName = "fleet_autoscalers_buffer_size"
Expand All @@ -44,9 +45,10 @@ var (
fleetAutoscalerViews = []string{fleetAutoscalerBufferLimitName, fleetAutoscalterBufferSizeName, fleetAutoscalerCurrentReplicaCountName,
fleetAutoscalersDesiredReplicaCountName, fleetAutoscalersAbleToScaleName, fleetAutoscalersLimitedName}
// fleetViews are metric views associated with Fleets
fleetViews = append([]string{fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName, fleetCountersName, fleetListsName}, fleetAutoscalerViews...)
fleetViews = append([]string{fleetRolloutPercent, fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName, fleetCountersName, fleetListsName}, fleetAutoscalerViews...)

stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
fleetRolloutPercentStats = stats.Int64("fleets/rollout_percent", "The current fleet rollout percentage", "1")
fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
Expand All @@ -65,6 +67,13 @@ var (
gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)

stateViews = []*view.View{
{
Name: fleetRolloutPercent,
Measure: fleetRolloutPercentStats,
Description: "Measures the current progress of fleet rollout",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyType, keyNamespace},
},
{
Name: fleetReplicaCountName,
Measure: fleetsReplicasCountStats,
Expand Down
Loading