Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Monitoring improvements #559

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions build/grafana/dashboard-gameservers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -401,42 +401,42 @@ data:
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(1, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))",
"expr": "histogram_quantile(1, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "max",
"refId": "F"
},
{
"expr": "histogram_quantile(0.99, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))",
"expr": "histogram_quantile(0.99, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "99th",
"refId": "A"
},
{
"expr": "histogram_quantile(0.90, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))",
"expr": "histogram_quantile(0.90, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "90th",
"refId": "B"
},
{
"expr": "histogram_quantile(0.50, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))",
"expr": "histogram_quantile(0.50, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "50th",
"refId": "C"
},
{
"expr": "histogram_quantile(0, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))",
"expr": "histogram_quantile(0, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "min",
"refId": "E"
},
{
"expr": " agones_gameservers_node_count_sum /\n agones_gameservers_node_count_count",
"expr": "avg(delta(agones_gameservers_node_count_sum[1m]) / delta(agones_gameservers_node_count_count[1m]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "avg",
Expand Down
27 changes: 27 additions & 0 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ package metrics
import (
"context"
"strconv"
"strings"
"sync"
"time"

corev1 "k8s.io/api/core/v1"
v1 "k8s.io/client-go/listers/core/v1"

stablev1alpha1 "agones.dev/agones/pkg/apis/stable/v1alpha1"
Expand Down Expand Up @@ -371,6 +373,8 @@ func (c *Controller) collectNodeCounts() {
c.logger.WithError(err).Warn("failed listing gameservers")
return
}

nodes = removeSystemNodes(nodes)
recordWithTags(context.Background(), []tag.Mutator{tag.Insert(keyEmpty, "true")},
nodesCountStats.M(int64(len(nodes)-len(gsPerNodes))))
recordWithTags(context.Background(), []tag.Mutator{tag.Insert(keyEmpty, "false")},
Expand All @@ -381,3 +385,26 @@ func (c *Controller) collectNodeCounts() {
}

}

func removeSystemNodes(nodes []*corev1.Node) []*corev1.Node {
var result []*corev1.Node

for _, n := range nodes {
if !isSystemNode(n) {
result = append(result, n)
}
}

return result
}

// isSystemNode determines if a node is a system node, by checking if it has any taints starting with "stable.agones.dev/"
func isSystemNode(n *corev1.Node) bool {
for _, t := range n.Spec.Taints {
if strings.HasPrefix(t.Key, "stable.agones.dev/") {
return true
}
}

return false
}
2 changes: 1 addition & 1 deletion pkg/metrics/controller_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ var (
Name: "gameservers_node_count",
Measure: gsPerNodesCountStats,
Description: "The count of gameservers per node in the cluster",
Aggregation: view.Distribution(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 40, 50, 60, 70, 80, 90, 100, 110, 120),
Aggregation: view.Distribution(0.00001, 1.00001, 2.00001, 3.00001, 4.00001, 5.00001, 6.00001, 7.00001, 8.00001, 9.00001, 10.00001, 11.00001, 12.00001, 13.00001, 14.00001, 15.00001, 16.00001, 32.00001, 40.00001, 50.00001, 60.00001, 70.00001, 80.00001, 90.00001, 100.00001, 110.00001, 120.00001),
},
}
)
Expand Down
5 changes: 3 additions & 2 deletions pkg/metrics/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ func TestControllerGameServersNodeState(t *testing.T) {
c.collect()
report()

assert.Nil(t, testutil.GatherAndCompare(registry, strings.NewReader(nodeCountExpected), "agones_nodes_count", "agones_gameservers_node_count"))

if err := testutil.GatherAndCompare(registry, strings.NewReader(nodeCountExpected), "agones_nodes_count", "agones_gameservers_node_count"); err != nil {
t.Fatal(err)
}
}
53 changes: 27 additions & 26 deletions pkg/metrics/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,32 +290,33 @@ agones_fleet_autoscalers_limited{fleet_name="deleted-fleet",name="deleted"} 0

var nodeCountExpected = `# HELP agones_gameservers_node_count The count of gameservers per node in the cluster
# TYPE agones_gameservers_node_count histogram
agones_gameservers_node_count_bucket{le="1"} 1
agones_gameservers_node_count_bucket{le="2"} 2
agones_gameservers_node_count_bucket{le="3"} 3
agones_gameservers_node_count_bucket{le="4"} 3
agones_gameservers_node_count_bucket{le="5"} 3
agones_gameservers_node_count_bucket{le="6"} 3
agones_gameservers_node_count_bucket{le="7"} 3
agones_gameservers_node_count_bucket{le="8"} 3
agones_gameservers_node_count_bucket{le="9"} 3
agones_gameservers_node_count_bucket{le="10"} 3
agones_gameservers_node_count_bucket{le="11"} 3
agones_gameservers_node_count_bucket{le="12"} 3
agones_gameservers_node_count_bucket{le="13"} 3
agones_gameservers_node_count_bucket{le="14"} 3
agones_gameservers_node_count_bucket{le="15"} 3
agones_gameservers_node_count_bucket{le="16"} 3
agones_gameservers_node_count_bucket{le="32"} 3
agones_gameservers_node_count_bucket{le="40"} 3
agones_gameservers_node_count_bucket{le="50"} 3
agones_gameservers_node_count_bucket{le="60"} 3
agones_gameservers_node_count_bucket{le="70"} 3
agones_gameservers_node_count_bucket{le="80"} 3
agones_gameservers_node_count_bucket{le="90"} 3
agones_gameservers_node_count_bucket{le="100"} 3
agones_gameservers_node_count_bucket{le="110"} 3
agones_gameservers_node_count_bucket{le="120"} 3
agones_gameservers_node_count_bucket{le="1e-05"} 1
agones_gameservers_node_count_bucket{le="1.00001"} 2
agones_gameservers_node_count_bucket{le="2.00001"} 3
agones_gameservers_node_count_bucket{le="3.00001"} 3
agones_gameservers_node_count_bucket{le="4.00001"} 3
agones_gameservers_node_count_bucket{le="5.00001"} 3
agones_gameservers_node_count_bucket{le="6.00001"} 3
agones_gameservers_node_count_bucket{le="7.00001"} 3
agones_gameservers_node_count_bucket{le="8.00001"} 3
agones_gameservers_node_count_bucket{le="9.00001"} 3
agones_gameservers_node_count_bucket{le="10.00001"} 3
agones_gameservers_node_count_bucket{le="11.00001"} 3
agones_gameservers_node_count_bucket{le="12.00001"} 3
agones_gameservers_node_count_bucket{le="13.00001"} 3
agones_gameservers_node_count_bucket{le="14.00001"} 3
agones_gameservers_node_count_bucket{le="15.00001"} 3
agones_gameservers_node_count_bucket{le="16.00001"} 3
agones_gameservers_node_count_bucket{le="32.00001"} 3
agones_gameservers_node_count_bucket{le="40.00001"} 3
agones_gameservers_node_count_bucket{le="50.00001"} 3
agones_gameservers_node_count_bucket{le="60.00001"} 3
agones_gameservers_node_count_bucket{le="70.00001"} 3
agones_gameservers_node_count_bucket{le="80.00001"} 3
agones_gameservers_node_count_bucket{le="90.00001"} 3
agones_gameservers_node_count_bucket{le="100.00001"} 3
agones_gameservers_node_count_bucket{le="110.00001"} 3
agones_gameservers_node_count_bucket{le="120.00001"} 3
agones_gameservers_node_count_bucket{le="+Inf"} 3
agones_gameservers_node_count_sum 3
agones_gameservers_node_count_count 3
Expand Down