Skip to content

Commit

Permalink
Counter and List Aggregate Fleet Metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
igooch committed Dec 1, 2023
1 parent 6fd270d commit 94a2f38
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 15 deletions.
22 changes: 22 additions & 0 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,10 @@ func (c *Controller) recordFleetChanges(obj interface{}) {

c.recordFleetReplicas(f.Name, f.Namespace, f.Status.Replicas, f.Status.AllocatedReplicas,
f.Status.ReadyReplicas, f.Spec.Replicas, f.Status.ReservedReplicas)

if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
c.recordCounters(f.Name, f.Namespace, f.Status.Counters)
}
}

func (c *Controller) recordFleetDeletion(obj interface{}) {
Expand Down Expand Up @@ -317,6 +321,24 @@ func (c *Controller) recordFleetReplicas(fleetName, fleetNamespace string, total
fleetsReplicasCountStats.M(int64(reserved)))
}

func (c *Controller) recordCounters(fleetName, fleetNamespace string, counters map[string]agonesv1.AggregatedCounterStatus) {

ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleetName), tag.Upsert(keyNamespace, fleetNamespace))

// TODO: I'm not sure about the tags -- should the keyType be "total", or leave out keyType?
for counter, counterStatus := range counters {
recordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "counter"), tag.Upsert(keyName, counter)},
fleetCounterAllocatedCountStats.M(counterStatus.AllocatedCount))
recordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "counter"), tag.Upsert(keyName, counter)},
fleetCounterAllocatedCapacityStats.M(counterStatus.AllocatedCapacity))
recordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "counter"), tag.Upsert(keyName, counter)},
fleetCounterCountStats.M(counterStatus.Count))
recordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "counter"), tag.Upsert(keyName, counter)},
fleetCounterCapacityStats.M(counterStatus.Capacity))
}

}

// recordGameServerStatusChanged records gameserver status changes, however since it's based
// on cache events some events might collapsed and not appear, for example transition state
// like creating, port allocation, could be skipped.
Expand Down
102 changes: 87 additions & 15 deletions pkg/metrics/controller_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ const (
fleetAutoscalersDesiredReplicaCountName = "fleet_autoscalers_desired_replicas_count"
fleetAutoscalersAbleToScaleName = "fleet_autoscalers_able_to_scale"
fleetAutoscalersLimitedName = "fleet_autoscalers_limited"
fleetCounterAllocatedCountName = "fleet_counter_allocated_count"
fleetCounterAllocatedCapacityName = "fleet_counter_allocated_capacity"
fleetCounterCountName = "fleet_counter_count"
fleetCounterCapacityName = "fleet_counter_capacity"
fleetListAllocatedCountName = "fleet_list_allocated_count"
fleetListAllocatedCapacityName = "fleet_list_allocated_capacity"
fleetListCountName = "fleet_list_count"
fleetListCapacityName = "fleet_list_capacity"
gameServersCountName = "gameservers_count"
gameServersTotalName = "gameservers_total"
gameServersPlayerConnectedTotalName = "gameserver_player_connected_total"
Expand All @@ -44,21 +52,29 @@ var (
// fleetViews are metric views associated with Fleets
fleetViews = append([]string{fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName}, fleetAutoscalerViews...)

stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
fasCurrentReplicasStats = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1")
fasDesiredReplicasStats = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1")
fasAbleToScaleStats = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1")
fasLimitedStats = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1")
gameServerCountStats = stats.Int64("gameservers/count", "The count of gameservers", "1")
gameServerTotalStats = stats.Int64("gameservers/total", "The total of gameservers", "1")
gameServerPlayerConnectedTotal = stats.Int64("gameservers/player_connected", "The total number of players connected to gameservers", "1")
gameServerPlayerCapacityTotal = stats.Int64("gameservers/player_capacity", "The available player capacity for gameservers", "1")
nodesCountStats = stats.Int64("nodes/count", "The count of nodes in the cluster", "1")
gsPerNodesCountStats = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1")
gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)
stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
fasCurrentReplicasStats = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1")
fasDesiredReplicasStats = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1")
fasAbleToScaleStats = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1")
fasLimitedStats = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1")
fleetCounterAllocatedCountStats = stats.Int64("fleets/counters/allocated_count", "Aggregated count of the Counter across allocated GameServers in the Fleet", "1")
fleetCounterAllocatedCapacityStats = stats.Int64("fleets/counters/allocated_capacity", "Aggregated maximum capacity of the Counter across allocated GameServers in the Fleet", "1")
fleetCounterCountStats = stats.Int64("fleets/counters/count", "Aggregated count of the Counter across the Fleet", "1")
fleetCounterCapacityStats = stats.Int64("fleets/counters/capacity", "Aggregated maximum capacity of the Counter across the Fleet", "1")
fleetListAllocatedCountStats = stats.Int64("fleets/lists/allocated_count", "Aggregated number of items in the List across allocated GameServers in the Fleet", "1")
fleetListAllocatedCapacityStats = stats.Int64("fleets/lists/allocated_capacity", "Aggregated maximum capacity of the List across allocated GameServers in the Fleet", "1")
fleetListCountStats = stats.Int64("fleets/lists/count", "Aggregated number of items in the List across the Fleet", "1")
fleetListCapacityStats = stats.Int64("fleets/lists/capacity", "Aggregated maximum capacity of the List across the Fleet", "1")
gameServerCountStats = stats.Int64("gameservers/count", "The count of gameservers", "1")
gameServerTotalStats = stats.Int64("gameservers/total", "The total of gameservers", "1")
gameServerPlayerConnectedTotal = stats.Int64("gameservers/player_connected", "The total number of players connected to gameservers", "1")
gameServerPlayerCapacityTotal = stats.Int64("gameservers/player_capacity", "The available player capacity for gameservers", "1")
nodesCountStats = stats.Int64("nodes/count", "The count of nodes in the cluster", "1")
gsPerNodesCountStats = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1")
gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)

stateViews = []*view.View{
{
Expand Down Expand Up @@ -110,6 +126,62 @@ var (
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetCounterAllocatedCountName,
Measure: fleetCounterAllocatedCountStats,
Description: "Aggregated count of the Counter across allocated GameServers in the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetCounterAllocatedCapacityName,
Measure: fleetCounterAllocatedCapacityStats,
Description: "Aggregated maximum capacity of the Counter across allocated GameServers in the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetCounterCountName,
Measure: fleetCounterCountStats,
Description: "Aggregated count of the Counter across the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetCounterCapacityName,
Measure: fleetCounterCapacityStats,
Description: "Aggregated maximum capacity of the Counter across the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetListAllocatedCountName,
Measure: fleetListAllocatedCountStats,
Description: "Aggregated number of items in the List across allocated GameServers in the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetListAllocatedCapacityName,
Measure: fleetListAllocatedCapacityStats,
Description: "Aggregated maximum capacity of the List across allocated GameServers in the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetListCountName,
Measure: fleetListCountStats,
Description: "Aggregated number of items in the List across the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: fleetListCapacityName,
Measure: fleetListCapacityStats,
Description: "Aggregated maximum capacity of the List across the Fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
{
Name: gameServersCountName,
Measure: gameServerCountStats,
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ var (
keyVerb = MustTagKey("verb")
keyEndpoint = MustTagKey("endpoint")
keyEmpty = MustTagKey("empty")
keyCounter = MustTagKey("counter")
)

func recordWithTags(ctx context.Context, mutators []tag.Mutator, ms ...stats.Measurement) {
Expand Down

0 comments on commit 94a2f38

Please sign in to comment.