Skip to content

Commit

Permalink
Add l4netlb metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
kl52752 committed Mar 4, 2022
1 parent a277b80 commit 08ef19b
Show file tree
Hide file tree
Showing 10 changed files with 460 additions and 17 deletions.
30 changes: 30 additions & 0 deletions pkg/l4lb/l4netlbcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ import (
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud"
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
listers "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/ingress-gce/pkg/annotations"
"k8s.io/ingress-gce/pkg/backends"
"k8s.io/ingress-gce/pkg/context"
"k8s.io/ingress-gce/pkg/controller/translator"
"k8s.io/ingress-gce/pkg/instances"
l4metrics "k8s.io/ingress-gce/pkg/l4lb/metrics"
"k8s.io/ingress-gce/pkg/loadbalancers"
"k8s.io/ingress-gce/pkg/utils"
"k8s.io/ingress-gce/pkg/utils/common"
Expand Down Expand Up @@ -107,6 +109,7 @@ func NewL4NetLBController(
}
},
})
//TODO change to component name "l4netlb-controller"
ctx.AddHealthCheck("service-controller health", l4netLBc.checkHealth)
return l4netLBc
}
Expand Down Expand Up @@ -290,12 +293,14 @@ func (lc *L4NetLBController) sync(key string) error {
klog.V(3).Infof("Ignoring sync of non-existent service %s", key)
return nil
}
namespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}.String()
if lc.needsDeletion(svc) {
klog.V(3).Infof("Deleting L4 External LoadBalancer resources for service %s", key)
result := lc.garbageCollectRBSNetLB(key, svc)
if result == nil {
return nil
}
lc.publishMetrics(result, namespacedName)
return result.Error
}

Expand All @@ -305,6 +310,7 @@ func (lc *L4NetLBController) sync(key string) error {
// result will be nil if the service was ignored(due to presence of service controller finalizer).
return nil
}
lc.publishMetrics(result, namespacedName)
return result.Error
}
klog.V(3).Infof("Ignoring sync of service %s, neither delete nor ensure needed.", key)
Expand Down Expand Up @@ -367,6 +373,7 @@ func (lc *L4NetLBController) syncInternal(service *v1.Service) *loadbalancers.L4
syncResult.Error = fmt.Errorf("failed to set resource annotations, err: %w", err)
return syncResult
}
syncResult.MetricsState.InSuccess = true
return syncResult
}

Expand Down Expand Up @@ -416,3 +423,26 @@ func (lc *L4NetLBController) garbageCollectRBSNetLB(key string, svc *v1.Service)
lc.ctx.Recorder(svc.Namespace).Eventf(svc, v1.EventTypeNormal, "DeletedLoadBalancer", "Deleted L4 External LoadBalancer")
return result
}

// publishMetrics sets controller metrics for NetLB services and pushes NetLB metrics based on sync type.
func (lc *L4NetLBController) publishMetrics(result *loadbalancers.L4NetLBSyncResult, namespacedName string) {
if result == nil {
return
}
switch result.SyncType {
case loadbalancers.SyncTypeCreate, loadbalancers.SyncTypeUpdate:
klog.V(4).Infof("External L4 Loadbalancer for Service %s ensured, updating its state %v in metrics cache", namespacedName, result.MetricsState)
lc.ctx.ControllerMetrics.SetL4NetLBService(namespacedName, result.MetricsState)
l4metrics.PublishNetLBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)

case loadbalancers.SyncTypeDelete:
// if service is successfully deleted, remove it from cache
if result.Error == nil {
klog.V(4).Infof("External L4 Loadbalancer for Service %s deleted, removing its state from metrics cache", namespacedName)
lc.ctx.ControllerMetrics.DeleteL4NetLBService(namespacedName)
}
l4metrics.PublishNetLBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)
default:
klog.Warningf("Unknown sync type %q, skipping metrics", result.SyncType)
}
}
41 changes: 40 additions & 1 deletion pkg/l4lb/l4netlbcontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"math/rand"
"net/http"
"reflect"
"sort"
"strings"
Expand Down Expand Up @@ -361,7 +362,25 @@ func TestForwardingRuleWithPortRange(t *testing.T) {
}

func TestProcessServiceCreate(t *testing.T) {
svc, lc := createAndSyncNetLBSvc(t)
lc := newL4NetLBServiceController()
svc := test.NewL4NetLBRBSService(8080)
addNetLBService(lc, svc)
prevMetrics := test.GetL4NetLBLatencyMetric(t)
if prevMetrics == nil {
t.Fatalf("Cannot get prometheus metrics for L4NetLB latency")
}
key, _ := common.KeyFunc(svc)
err := lc.sync(key)
if err != nil {
t.Errorf("Failed to sync newly added service %s, err %v", svc.Name, err)
}
svc, err = lc.ctx.KubeClient.CoreV1().Services(svc.Namespace).Get(context.TODO(), svc.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Failed to lookup service %s, err %v", svc.Name, err)
}
prevMetrics.ValidateDiff(test.GetL4NetLBLatencyMetric(t), &test.L4LBLatencyMetricInfo{CreateCount: 1, UpperBoundSeconds: 1}, t)

validateNetLBSvcStatus(svc, t)
if err := checkBackendService(lc, svc); err != nil {
t.Errorf("UnexpectedError %v", err)
}
Expand Down Expand Up @@ -509,6 +528,26 @@ func TestProcessServiceCreationFailed(t *testing.T) {
}
}
}

func TestMetricsWithSyncError(t *testing.T) {
lc := newL4NetLBServiceController()
(lc.ctx.Cloud.Compute().(*cloud.MockGCE)).MockForwardingRules.InsertHook = mock.InsertForwardingRulesInternalErrHook
prevMetrics := test.GetL4NetLBErrorMetric(t)
svc := test.NewL4NetLBRBSService(8080)
addNetLBService(lc, svc)

key, _ := common.KeyFunc(svc)
err := lc.sync(key)
if err == nil {
t.Errorf("Expected error in sync controller")
}
expectMetrics := &test.L4LBErrorMetricInfo{
ByGCEResource: map[string]uint64{annotations.ForwardingRuleResource: 1},
ByErrorType: map[string]uint64{http.StatusText(http.StatusInternalServerError): 1}}
received := test.GetL4NetLBErrorMetric(t)
prevMetrics.ValidateDiff(received, expectMetrics, t)
}

func TestProcessServiceDeletionFailed(t *testing.T) {
for _, param := range []struct {
addMockFunc func(*cloud.MockGCE)
Expand Down
50 changes: 46 additions & 4 deletions pkg/l4lb/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ import (
)

const (
statusSuccess = "success"
statusError = "error"
L4ilbLatencyMetricName = "l4_ilb_sync_duration_seconds"
L4ilbErrorMetricName = "l4_ilb_sync_error_count"
statusSuccess = "success"
statusError = "error"
L4ilbLatencyMetricName = "l4_ilb_sync_duration_seconds"
L4ilbErrorMetricName = "l4_ilb_sync_error_count"
L4netlbLatencyMetricName = "l4_netlb_sync_duration_seconds"
L4netlbErrorMetricName = "l4_netlb_sync_error_count"
)

var (
Expand Down Expand Up @@ -58,12 +60,30 @@ var (
},
l4LBSyncErrorMetricLabels,
)
l4NetLBSyncLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: L4netlbLatencyMetricName,
Help: "Latency of an L4 NetLB Sync",
// custom buckets - [30s, 60s, 120s, 240s(4min), 480s(8min), 960s(16m), +Inf]
Buckets: prometheus.ExponentialBuckets(30, 2, 6),
},
l4LBSyncLatencyMetricsLabels,
)
l4NetLBSyncErrorCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: L4netlbErrorMetricName,
Help: "Count of L4 NetLB Sync errors",
},
l4LBSyncErrorMetricLabels,
)
)

// init registers l4 ilb sync metrics.
func init() {
klog.V(3).Infof("Registering L4 ILB controller metrics %v, %v", l4ILBSyncLatency, l4ILBSyncErrorCount)
prometheus.MustRegister(l4ILBSyncLatency, l4ILBSyncErrorCount)
klog.V(3).Infof("Registering L4 NetLB controller metrics %v, %v", l4NetLBSyncLatency, l4NetLBSyncErrorCount)
prometheus.MustRegister(l4NetLBSyncLatency, l4NetLBSyncErrorCount)
}

// PublishL4ILBSyncMetrics exports metrics related to the L4 ILB sync.
Expand All @@ -87,3 +107,25 @@ func publishL4ILBSyncLatency(success bool, syncType string, startTime time.Time)
func publishL4ILBSyncErrorCount(syncType, gceResource, errorType string) {
l4ILBSyncErrorCount.WithLabelValues(syncType, gceResource, errorType).Inc()
}

// PublishL4NetLBSyncMetrics exports metrics related to the L4 NetLB sync.
func PublishNetLBSyncMetrics(success bool, syncType, gceResource, errType string, startTime time.Time) {
publishL4NetLBSyncLatency(success, syncType, startTime)
if !success {
publishL4NetLBSyncErrorCount(syncType, gceResource, errType)
}
}

// publishL4NetLBSyncLatency exports the given sync latency datapoint.
func publishL4NetLBSyncLatency(success bool, syncType string, startTime time.Time) {
status := statusSuccess
if !success {
status = statusError
}
l4NetLBSyncLatency.WithLabelValues(status, syncType).Observe(time.Since(startTime).Seconds())
}

// publishL4NetLBSyncLatency exports the given sync latency datapoint.
func publishL4NetLBSyncErrorCount(syncType, gceResource, errorType string) {
l4NetLBSyncErrorCount.WithLabelValues(syncType, gceResource, errorType).Inc()
}
15 changes: 10 additions & 5 deletions pkg/loadbalancers/l4netlb.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,9 @@ type L4NetLBSyncResult struct {
Error error
GCEResourceInError string
Status *corev1.LoadBalancerStatus
// TODO(kl52752) Change metric service state to NetLB
MetricsState metrics.L4ILBServiceState
SyncType string
StartTime time.Time
MetricsState metrics.L4NetLBServiceState
SyncType string
StartTime time.Time
}

// NewL4NetLB creates a new Handler for the given L4NetLB service.
Expand Down Expand Up @@ -141,7 +140,7 @@ func (l4netlb *L4NetLB) EnsureFrontend(nodeNames []string, svc *corev1.Service)
return result
}
result.Annotations[annotations.BackendServiceKey] = name
fr, _, err := l4netlb.ensureExternalForwardingRule(bs.SelfLink)
fr, ipAddrType, err := l4netlb.ensureExternalForwardingRule(bs.SelfLink)
if err != nil {
result.GCEResourceInError = annotations.ForwardingRuleResource
result.Error = fmt.Errorf("Failed to ensure forwarding rule - %w", err)
Expand All @@ -153,6 +152,12 @@ func (l4netlb *L4NetLB) EnsureFrontend(nodeNames []string, svc *corev1.Service)
result.Annotations[annotations.UDPForwardingRuleKey] = fr.Name
}
result.Status = &corev1.LoadBalancerStatus{Ingress: []corev1.LoadBalancerIngress{{IP: fr.IPAddress}}}
if fr.NetworkTier == cloud.NetworkTierPremium.ToGCEValue() {
result.MetricsState.IsPremiumTier = true
}
if ipAddrType == IPAddrManaged {
result.MetricsState.IsManagedIP = true
}
return result
}

Expand Down
64 changes: 64 additions & 0 deletions pkg/loadbalancers/l4netlb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,29 @@ import (

"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud"
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/mock"
ga "google.golang.org/api/compute/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
servicehelper "k8s.io/cloud-provider/service/helpers"
"k8s.io/ingress-gce/pkg/annotations"
"k8s.io/ingress-gce/pkg/composite"
"k8s.io/ingress-gce/pkg/metrics"
"k8s.io/ingress-gce/pkg/test"
"k8s.io/ingress-gce/pkg/utils"
namer_util "k8s.io/ingress-gce/pkg/utils/namer"
"k8s.io/legacy-cloud-providers/gce"
)

const (
managedIP = true
unmanagedIP = false
premiumTier = true
standardTier = false
usersIP = "35.10.211.60"
userAddrName = "UserStaticAddress"
)

func TestEnsureL4NetLoadBalancer(t *testing.T) {
t.Parallel()
nodeNames := []string{"test-node-1"}
Expand All @@ -59,6 +71,9 @@ func TestEnsureL4NetLoadBalancer(t *testing.T) {
t.Errorf("Annotations error: %v", err)
}
assertNetLbResources(t, svc, l4netlb, nodeNames)
if err := checkMetrics(result.MetricsState, managedIP, premiumTier); err != nil {
t.Errorf("Metrics error: %v", err)
}
}

func checkAnnotations(result *L4NetLBSyncResult, l4netlb *L4NetLB) error {
Expand Down Expand Up @@ -263,3 +278,52 @@ func assertNetLbResources(t *testing.T, apiService *v1.Service, l4NetLb *L4NetLB
t.Errorf("Expected error when looking up ephemeral address, got %v", addr)
}
}

func TestMetricsForUserStaticService(t *testing.T) {
nodeNames := []string{"test-node-1"}
vals := gce.DefaultTestClusterValues()
fakeGCE := getFakeGCECloud(vals)
createUserStaticIP(fakeGCE, vals.Region)

svc := test.NewL4NetLBRBSService(8080)
svc.Spec.LoadBalancerIP = usersIP
svc.ObjectMeta.Annotations[annotations.NetworkTierAnnotationKey] = string(cloud.NetworkTierStandard)
namer := namer_util.NewL4Namer(kubeSystemUID, namer_util.NewNamer(vals.ClusterName, "cluster-fw"))

l4netlb := NewL4NetLB(svc, fakeGCE, meta.Regional, namer, record.NewFakeRecorder(100), &sync.Mutex{})

if _, err := test.CreateAndInsertNodes(l4netlb.cloud, nodeNames, vals.ZoneName); err != nil {
t.Errorf("Unexpected error when adding nodes %v", err)
}
result := l4netlb.EnsureFrontend(nodeNames, svc)
if result.Error != nil {
t.Errorf("Failed to ensure loadBalancer, err %v", result.Error)
}
if err := checkMetrics(result.MetricsState, unmanagedIP, standardTier); err != nil {
t.Errorf("Metrics error: %v", err)
}
}

func createUserStaticIP(fakeGCE *gce.Cloud, region string) {
fakeGCE.Compute().(*cloud.MockGCE).MockAddresses.InsertHook = mock.InsertAddressHook
fakeGCE.Compute().(*cloud.MockGCE).MockAlphaAddresses.X = mock.AddressAttributes{}
fakeGCE.Compute().(*cloud.MockGCE).MockAddresses.X = mock.AddressAttributes{}
newAddr := &ga.Address{
Name: "userAddrName",
Description: fmt.Sprintf(`{"kubernetes.io/service-name":"%s"}`, "userAddrName"),
Address: usersIP,
AddressType: string(cloud.SchemeExternal),
NetworkTier: cloud.NetworkTierStandard.ToGCEValue(),
}
fakeGCE.ReserveRegionAddress(newAddr, region)
}

func checkMetrics(m metrics.L4NetLBServiceState, isManaged, isPremium bool) error {
if m.IsPremiumTier != isPremium {
return fmt.Errorf("L4 NetLB metric premium tier should be %v", isPremium)
}
if m.IsManagedIP != isManaged {
return fmt.Errorf("L4 NetLB metric is managed ip should be %v", isManaged)
}
return nil
}
12 changes: 12 additions & 0 deletions pkg/metrics/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ const (
// updating GCE Load Balancer.
l4ILBInError = feature("L4ILBInError")

l4NetLBService = feature("L4NetLBService")
// L4NetLBPremiumNetworkTier feature specifies that NetLB VIP is configured in Premium Network Tier.
l4NetLBPremiumNetworkTier = feature("L4NetLBPremiumNetworkTier")
// L4NetLBPremiumNetworkTier feature specifies that static IP Address is managed by controller.
l4NetLBManagedStaticIP = feature("L4NetLBManagedStaticIP")
// L4NetLBPremiumNetworkTier feature specifies number of all static IP Address managed by controller and by user.
l4NetLBStaticIP = feature("L4NetLBStaticIP")
// l4NetLBInSuccess feature specifies that NetLB VIP is configured.
l4NetLBInSuccess = feature("L4NetLBInSuccess")
// l4NetLBInInError feature specifies that an error had occurred while creating/updating GCE Load Balancer.
l4NetLBInError = feature("L4NetLBInError")

// PSC Features
sa = feature("ServiceAttachments")
saInSuccess = feature("ServiceAttachmentInSuccess")
Expand Down
Loading

0 comments on commit 08ef19b

Please sign in to comment.