Skip to content

Commit

Permalink
Export various stats about services in the metrics exported by this c…
Browse files Browse the repository at this point in the history
…ontroller.
  • Loading branch information
mmamczur committed Feb 23, 2023
1 parent ed35b2e commit 309bac4
Show file tree
Hide file tree
Showing 4 changed files with 1,035 additions and 0 deletions.
7 changes: 7 additions & 0 deletions cmd/glbc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/ingress-gce/pkg/l4lb"
"k8s.io/ingress-gce/pkg/psc"
"k8s.io/ingress-gce/pkg/serviceattachment"
"k8s.io/ingress-gce/pkg/servicemetrics"
"k8s.io/ingress-gce/pkg/svcneg"
"k8s.io/klog/v2"

Expand Down Expand Up @@ -293,6 +294,12 @@ func runControllers(ctx *ingctx.ControllerContext) {
klog.V(0).Infof("PSC Controller started")
}

if flags.F.EnableServiceMetrics {
metricsController := servicemetrics.NewController(ctx, flags.F.MetricsExportInterval, stopCh)
go metricsController.Run()
klog.V(0).Infof("Service Metrics Controller started")
}

var zoneGetter negtypes.ZoneGetter
zoneGetter = lbc.Translator
// In NonGCP mode, use the zone specified in gce.conf directly.
Expand Down
2 changes: 2 additions & 0 deletions pkg/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ var (
EnablePinhole bool
EnableL4ILBDualStack bool
EnableMultipleIGs bool
EnableServiceMetrics bool
MaxIGSize int
}{
GCERateLimitScale: 1.0,
Expand Down Expand Up @@ -244,6 +245,7 @@ L7 load balancing. CSV values accepted. Example: -node-port-ranges=80,8080,400-5
flag.BoolVar(&F.RunIngressController, "run-ingress-controller", true, `Optional, whether or not to run IngressController as part of glbc. If set to false, ingress resources will not be processed. Only the L4 Service controller will be run, if that flag is set to true.`)
flag.BoolVar(&F.RunL4Controller, "run-l4-controller", false, `Optional, whether or not to run L4 Service Controller as part of glbc. If set to true, services of Type:LoadBalancer with Internal annotation will be processed by this controller.`)
flag.BoolVar(&F.RunL4NetLBController, "run-l4-netlb-controller", false, `Optional, f enabled then the L4NetLbController will be run.`)
flag.BoolVar(&F.EnableServiceMetrics, "enable-service-metrics", false, `Optional, if enabled then the service metrics controller will be run.`)
flag.BoolVar(&F.EnableBackendConfigHealthCheck, "enable-backendconfig-healthcheck", false, "Enable configuration of HealthChecks from the BackendConfig")
flag.BoolVar(&F.EnablePSC, "enable-psc", false, "Enable PSC controller")
flag.BoolVar(&F.EnableIngressGAFields, "enable-ingress-ga-fields", false, "Enable using Ingress Class GA features")
Expand Down
361 changes: 361 additions & 0 deletions pkg/servicemetrics/servicemetrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,361 @@
package servicemetrics

import (
"fmt"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
listers "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/ingress-gce/pkg/annotations"
"k8s.io/ingress-gce/pkg/context"
"k8s.io/ingress-gce/pkg/utils"
"k8s.io/ingress-gce/pkg/utils/common"
"k8s.io/klog/v2"
"k8s.io/legacy-cloud-providers/gce"
)

const (
// Names of the labels used by the service metrics.
labelType = "type"
labelExternalTrafficPolicy = "external_traffic_policy"
labelInternalTrafficPolicy = "internal_traffic_policy"
labelSessionAffinityConfig = "session_affinity_config"
labelProtocol = "protocol"
labelIPFamilies = "ip_families"
labelIPFamilyPolicy = "ip_family_policy"
labelIsStaticIPv4 = "is_static_ip_v4"
labelIsStaticIPv6 = "is_static_ip_v6"
labelNetworkTier = "network_tier"
labelGlobalAccess = "global_access"
labelCustomSubnet = "custom_subnet"
labelNumberOfPorts = "number_of_ports"

// possible values for the service_type label
serviceTypeSubsettingILB = "SubsettingILB"
serviceTypeRBSXLB = "RBSXLB"
serviceTypeLegacyILB = "LegacyILB"
serviceTypeLegacyXLB = "LegacyXLB"

// sessionAffinityTimeoutDefault is the default timeout value for a service session affinity.
sessionAffinityTimeoutDefault = 10800

// possible values for the session_affinity_config label.
sessionAffinityBucketMoreThanDefault = "10800+"
sessionAffinityBucketDefault = "10800"
sessionAffinityBucketLessThanDefault = "0-10799"
sessionAffinityBucketNone = "None"
)

var (
serviceL4ProtocolStatsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_l4_protocol_stats",
Help: "Number of services broken down by various stats",
},
[]string{
labelType,
labelExternalTrafficPolicy,
labelInternalTrafficPolicy,
labelSessionAffinityConfig,
labelProtocol,
labelNumberOfPorts,
},
)
serviceIPStackStatsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_ip_stack_stats",
Help: "Number of services broken down by various stats",
},
[]string{
labelType,
labelExternalTrafficPolicy,
labelInternalTrafficPolicy,
labelIPFamilies,
labelIPFamilyPolicy,
labelIsStaticIPv4,
labelIsStaticIPv6,
},
)
serviceGCPFeaturesStatsCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_gcp_features_stats",
Help: "Number of services broken down by various stats",
},
[]string{
labelType,
labelNetworkTier,
labelGlobalAccess,
labelCustomSubnet,
},
)
)

func init() {
klog.V(3).Infof("Registering Service stats usage metrics %v", serviceL4ProtocolStatsCount)
prometheus.MustRegister(serviceL4ProtocolStatsCount)
prometheus.MustRegister(serviceIPStackStatsCount)
prometheus.MustRegister(serviceGCPFeaturesStatsCount)
}

// Controller is the controller that exposes and populates metrics containing various stats about Services in the cluster.
type Controller struct {
ctx *context.ControllerContext
stopCh chan struct{}
svcQueue utils.TaskQueue
metricsInterval time.Duration
serviceInformer cache.SharedIndexInformer
}

// NewController creates a new Controller.
func NewController(ctx *context.ControllerContext, exportInterval time.Duration, stopCh chan struct{}) *Controller {
svcMetrics := &Controller{
ctx: ctx,
stopCh: stopCh,
serviceInformer: ctx.ServiceInformer,
metricsInterval: exportInterval,
}
return svcMetrics
}

// Run starts the controller until stopped via the stop channel.
func (c *Controller) Run() {
klog.Infof("Starting Service Metric Stats controller")
go func() {
time.Sleep(c.metricsInterval)
wait.Until(c.export, c.metricsInterval, c.stopCh)
}()
<-c.stopCh
}

// serviceL4ProtocolMetricState defines metric state related to the L4 protocol
// related part of services.
type serviceL4ProtocolMetricState struct {
Type string
ExternalTrafficPolicy string
InternalTrafficPolicy string
SessionAffinityConfig string
NumberOfPorts string
Protocol string
}

// serviceIPStackMetricState defines metric state related to the IP stack of services.
type serviceIPStackMetricState struct {
Type string
ExternalTrafficPolicy string
InternalTrafficPolicy string
IPFamilies string
IPFamilyPolicy string
IsStaticIPv4 bool
IsStaticIPv6 bool
}

// serviceGCPFeaturesMetricState defines metric state related to the GCP
// specific features of services.
type serviceGCPFeaturesMetricState struct {
Type string
NetworkTier string
GlobalAccess bool
CustomSubnet bool
}

func (c *Controller) export() {
serviceLister := c.serviceInformer.GetIndexer()
services, err := listers.NewServiceLister(serviceLister).List(labels.Everything())
if err != nil {
klog.Errorf("failed to list services err=%v", err)
return
}

l4ProtocolState, ipStackState, gcpFeaturesState := groupServices(services)

updatePrometheusMetrics(l4ProtocolState, ipStackState, gcpFeaturesState)
}

func groupServices(services []*v1.Service) (map[serviceL4ProtocolMetricState]int64, map[serviceIPStackMetricState]int64, map[serviceGCPFeaturesMetricState]int64) {
l4ProtocolState := make(map[serviceL4ProtocolMetricState]int64)
ipStackState := make(map[serviceIPStackMetricState]int64)
gcpFeaturesState := make(map[serviceGCPFeaturesMetricState]int64)

for _, service := range services {
l4Protocol, ipStack, gcpFeatures := metricsForService(service)
l4ProtocolState[*l4Protocol]++
ipStackState[*ipStack]++
gcpFeaturesState[*gcpFeatures]++
}
return l4ProtocolState, ipStackState, gcpFeaturesState
}

func updatePrometheusMetrics(l4ProtocolState map[serviceL4ProtocolMetricState]int64, ipStackState map[serviceIPStackMetricState]int64, gcpFeaturesState map[serviceGCPFeaturesMetricState]int64) {
for serviceStat, count := range l4ProtocolState {
serviceL4ProtocolStatsCount.With(prometheus.Labels{
labelType: serviceStat.Type,
labelExternalTrafficPolicy: serviceStat.ExternalTrafficPolicy,
labelInternalTrafficPolicy: serviceStat.InternalTrafficPolicy,
labelSessionAffinityConfig: serviceStat.SessionAffinityConfig,
labelProtocol: serviceStat.Protocol,
labelNumberOfPorts: serviceStat.NumberOfPorts,
}).Set(float64(count))
}

for serviceStat, count := range ipStackState {
serviceIPStackStatsCount.With(prometheus.Labels{
labelType: serviceStat.Type,
labelExternalTrafficPolicy: serviceStat.ExternalTrafficPolicy,
labelInternalTrafficPolicy: serviceStat.InternalTrafficPolicy,
labelIPFamilies: serviceStat.IPFamilies,
labelIPFamilyPolicy: serviceStat.IPFamilyPolicy,
labelIsStaticIPv4: strconv.FormatBool(serviceStat.IsStaticIPv4),
labelIsStaticIPv6: strconv.FormatBool(serviceStat.IsStaticIPv6),
}).Set(float64(count))
}

for serviceStat, count := range gcpFeaturesState {
serviceGCPFeaturesStatsCount.With(prometheus.Labels{
labelType: serviceStat.Type,
labelNetworkTier: serviceStat.NetworkTier,
labelGlobalAccess: strconv.FormatBool(serviceStat.GlobalAccess),
labelCustomSubnet: strconv.FormatBool(serviceStat.CustomSubnet),
}).Set(float64(count))
}
}

func metricsForService(service *v1.Service) (*serviceL4ProtocolMetricState, *serviceIPStackMetricState, *serviceGCPFeaturesMetricState) {
serviceType := getServiceType(service)
internalTrafficPolicy := getInternalTrafficPolicy(service)
externalTrafficPolicy := getExternalTrafficPolicy(service)
l4Protocol := &serviceL4ProtocolMetricState{
Type: serviceType,
ExternalTrafficPolicy: externalTrafficPolicy,
InternalTrafficPolicy: internalTrafficPolicy,
SessionAffinityConfig: getSessionAffinityConfig(service),
NumberOfPorts: getPortsBucket(service.Spec.Ports),
Protocol: getProtocol(service.Spec.Ports),
}
ipStack := &serviceIPStackMetricState{
Type: serviceType,
ExternalTrafficPolicy: externalTrafficPolicy,
InternalTrafficPolicy: internalTrafficPolicy,
IPFamilies: getIPFamilies(service.Spec.IPFamilies),
IPFamilyPolicy: getIPFamilyPolicy(service.Spec.IPFamilyPolicy),
// TODO update this logic once static IPv6 addresses are supported.
IsStaticIPv4: service.Spec.LoadBalancerIP != "",
IsStaticIPv6: false,
}
netTier, _ := utils.GetNetworkTier(service)
gcpFeatures := &serviceGCPFeaturesMetricState{
Type: serviceType,
NetworkTier: string(netTier),
GlobalAccess: gce.GetLoadBalancerAnnotationAllowGlobalAccess(service),
CustomSubnet: gce.GetLoadBalancerAnnotationSubnet(service) != "",
}
return l4Protocol, ipStack, gcpFeatures
}

func getExternalTrafficPolicy(service *v1.Service) string {
if service.Spec.ExternalTrafficPolicy == "" {
return string(v1.ServiceExternalTrafficPolicyTypeCluster)
}
return string(service.Spec.ExternalTrafficPolicy)
}

func getInternalTrafficPolicy(service *v1.Service) string {
if service.Spec.InternalTrafficPolicy == nil {
return string(v1.ServiceInternalTrafficPolicyCluster)
}
return string(*service.Spec.InternalTrafficPolicy)
}

func getPortsBucket(ports []v1.ServicePort) string {
n := len(ports)
if n <= 1 {
return fmt.Sprint(n)
}
if n <= 5 {
return "2-5"
}
if n <= 100 {
return "6-100"
}
return "100+"
}

func protocolOrDefault(port v1.ServicePort) string {
if port.Protocol == "" {
return string(v1.ProtocolTCP)
}
return string(port.Protocol)
}

func getProtocol(ports []v1.ServicePort) string {
if len(ports) == 0 {
return ""
}
protocol := protocolOrDefault(ports[0])
for _, port := range ports {
if protocol != protocolOrDefault(port) {
return "mixed"
}
}
return protocol
}

func getIPFamilies(families []v1.IPFamily) string {
if len(families) == 2 {
return fmt.Sprintf("%s-%s", string(families[0]), string(families[1]))
}
return string(families[0])
}

func getIPFamilyPolicy(policyType *v1.IPFamilyPolicyType) string {
if policyType == nil {
return string(v1.IPFamilyPolicySingleStack)
}
return string(*policyType)
}

func getServiceType(service *v1.Service) string {
if service.Spec.Type != v1.ServiceTypeLoadBalancer {
return string(service.Spec.Type)
}
wantsL4ILB, _ := annotations.WantsL4ILB(service)

if wantsL4ILB {
if common.HasGivenFinalizer(service.ObjectMeta, common.ILBFinalizerV2) {
return serviceTypeSubsettingILB
}
return serviceTypeLegacyILB
}
wantsL4NetLB, _ := annotations.WantsL4NetLB(service)
if wantsL4NetLB {
if common.HasGivenFinalizer(service.ObjectMeta, common.NetLBFinalizerV2) {
return serviceTypeRBSXLB
}
return serviceTypeLegacyXLB
}
return ""
}

func getSessionAffinityConfig(service *v1.Service) string {
if service.Spec.SessionAffinity != v1.ServiceAffinityClientIP {
return sessionAffinityBucketNone
}
if service.Spec.SessionAffinityConfig == nil ||
service.Spec.SessionAffinityConfig.ClientIP == nil ||
service.Spec.SessionAffinityConfig.ClientIP.TimeoutSeconds == nil {
return sessionAffinityBucketDefault
}
timeout := *service.Spec.SessionAffinityConfig.ClientIP.TimeoutSeconds

if timeout < sessionAffinityTimeoutDefault {
return sessionAffinityBucketLessThanDefault
}
if timeout == sessionAffinityTimeoutDefault {
return sessionAffinityBucketDefault
}
return sessionAffinityBucketMoreThanDefault
}
Loading

0 comments on commit 309bac4

Please sign in to comment.