Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add L4 DualStack Sync Latency metrics #1945

Merged
merged 1 commit into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pkg/l4lb/l4controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,11 +505,12 @@ func (l4c *L4Controller) publishMetrics(result *loadbalancers.L4ILBSyncResult, n
case loadbalancers.SyncTypeCreate, loadbalancers.SyncTypeUpdate:
klog.V(2).Infof("Internal L4 Loadbalancer for Service %s ensured, updating its state %v in metrics cache", namespacedName, result.MetricsState)
l4c.ctx.ControllerMetrics.SetL4ILBService(namespacedName, result.MetricsState)
l4metrics.PublishILBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)
if l4c.enableDualStack {
klog.V(2).Infof("Internal L4 DualStack Loadbalancer for Service %s ensured, updating its state %v in metrics cache", namespacedName, result.DualStackMetricsState)
l4c.ctx.ControllerMetrics.SetL4ILBDualStackService(namespacedName, result.DualStackMetricsState)
l4metrics.PublishL4ILBDualStackSyncLatency(result.Error == nil, result.SyncType, result.DualStackMetricsState.IPFamilies, result.StartTime)
}
l4metrics.PublishILBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)

case loadbalancers.SyncTypeDelete:
// if service is successfully deleted, remove it from cache
Expand All @@ -522,6 +523,9 @@ func (l4c *L4Controller) publishMetrics(result *loadbalancers.L4ILBSyncResult, n
}
}
l4metrics.PublishILBSyncMetrics(result.Error == nil, result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)
if l4c.enableDualStack {
l4metrics.PublishL4ILBDualStackSyncLatency(result.Error == nil, result.SyncType, result.DualStackMetricsState.IPFamilies, result.StartTime)
}
default:
klog.Warningf("Unknown sync type %q, skipping metrics", result.SyncType)
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/l4lb/l4netlbcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,4 +670,7 @@ func (lc *L4NetLBController) publishSyncMetrics(result *loadbalancers.L4NetLBSyn
return
}
metrics.PublishL4NetLBSyncError(result.SyncType, result.GCEResourceInError, utils.GetErrorType(result.Error), result.StartTime)
if lc.enableDualStack {
metrics.PublishL4NetLBDualStackSyncLatency(result.Error == nil, result.SyncType, result.DualStackMetricsState.IPFamilies, result.StartTime)
}
}
45 changes: 43 additions & 2 deletions pkg/l4lb/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ const (
statusSuccess = "success"
statusError = "error"
L4ilbLatencyMetricName = "l4_ilb_sync_duration_seconds"
L4ILBDualStackLatencyMetricName = "l4_ilb_dualstack_sync_duration_seconds"
L4ilbErrorMetricName = "l4_ilb_sync_error_count"
L4netlbLatencyMetricName = "l4_netlb_sync_duration_seconds"
L4NetLBDualStackLatencyMetricName = "l4_netlb_dualstack_sync_duration_seconds"
L4netlbErrorMetricName = "l4_netlb_sync_error_count"
L4netlbLegacyToRBSMigrationPreventedMetricName = "l4_netlb_legacy_to_rbs_migration_prevented_count"
l4failedHealthCheckName = "l4_failed_healthcheck_count"
Expand All @@ -40,7 +42,8 @@ var (
"sync_result", // result of the sync
"sync_type", // whether this is a new service, update or delete
}
l4LBSyncErrorMetricLabels = []string{
l4LBDualStackSyncLatencyMetricsLabels = append(l4LBSyncLatencyMetricsLabels, "ip_families")
l4LBSyncErrorMetricLabels = []string{
"sync_type", // whether this is a new service, update or delete
"gce_resource", // The GCE resource whose update caused the error
// max number of values for error_type = 18 k8s error reasons + 60 http status errors.
Expand All @@ -57,14 +60,22 @@ var (
},
l4LBSyncLatencyMetricsLabels,
)
l4ILBDualStackSyncLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: L4ILBDualStackLatencyMetricName,
Help: "Latency of an L4 ILB DualStack Sync",
Buckets: prometheus.ExponentialBuckets(0.5, 2, 12),
},
l4LBDualStackSyncLatencyMetricsLabels,
)
l4ILBSyncErrorCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: L4ilbErrorMetricName,
Help: "Count of L4 ILB Sync errors",
},
l4LBSyncErrorMetricLabels,
)
// l4ILBSyncLatency is a metric that represents the time spent processing L4NetLB service.
// l4NetLBSyncLatency is a metric that represents the time spent processing L4NetLB service.
// The metric is labeled with synchronization type and its result.
l4NetLBSyncLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand All @@ -76,6 +87,14 @@ var (
},
l4LBSyncLatencyMetricsLabels,
)
l4NetLBDualStackSyncLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: L4NetLBDualStackLatencyMetricName,
Help: "Latency of an L4 NetB DualStack Sync",
Buckets: prometheus.ExponentialBuckets(0.5, 2, 12),
},
l4LBDualStackSyncLatencyMetricsLabels,
)
// l4NetLBSyncErrorCount is a metric that counts number of L4NetLB services in Error state.
// The metric is labeled with synchronization type, the type of error and the name of gce resource that is in error.
l4NetLBSyncErrorCount = prometheus.NewCounterVec(
Expand Down Expand Up @@ -112,8 +131,12 @@ var (
func init() {
klog.V(3).Infof("Registering L4 ILB controller metrics %v, %v", l4ILBSyncLatency, l4ILBSyncErrorCount)
prometheus.MustRegister(l4ILBSyncLatency, l4ILBSyncErrorCount)
klog.V(3).Infof("Registering L4 ILB DualStack controller metrics %v", l4ILBDualStackSyncLatency)
prometheus.MustRegister(l4ILBDualStackSyncLatency)
klog.V(3).Infof("Registering L4 NetLB controller metrics %v, %v", l4NetLBSyncLatency, l4NetLBSyncErrorCount)
prometheus.MustRegister(l4NetLBSyncLatency, l4NetLBSyncErrorCount)
klog.V(3).Infof("Registering L4 NetLB DualStack controller metrics %v", l4NetLBDualStackSyncLatency)
prometheus.MustRegister(l4NetLBDualStackSyncLatency)
klog.V(3).Infof("Registering L4 healthcheck failures count metric: %v", l4FailedHealthCheckCount)
prometheus.MustRegister(l4FailedHealthCheckCount)
klog.V(3).Infof("Registering L4 controller healthcheck metric: %v", l4ControllerHealthCheck)
Expand All @@ -137,6 +160,15 @@ func publishL4ILBSyncLatency(success bool, syncType string, startTime time.Time)
l4ILBSyncLatency.WithLabelValues(status, syncType).Observe(time.Since(startTime).Seconds())
}

// PublishL4ILBDualStackSyncLatency exports the given sync latency datapoint.
func PublishL4ILBDualStackSyncLatency(success bool, syncType, ipFamilies string, startTime time.Time) {
status := statusSuccess
if !success {
status = statusError
}
l4ILBDualStackSyncLatency.WithLabelValues(status, syncType, ipFamilies).Observe(time.Since(startTime).Seconds())
}

// publishL4ILBSyncLatency exports the given sync latency datapoint.
func publishL4ILBSyncErrorCount(syncType, gceResource, errorType string) {
l4ILBSyncErrorCount.WithLabelValues(syncType, gceResource, errorType).Inc()
Expand All @@ -147,6 +179,15 @@ func PublishL4NetLBSyncSuccess(syncType string, startTime time.Time) {
l4NetLBSyncLatency.WithLabelValues(statusSuccess, syncType).Observe(time.Since(startTime).Seconds())
}

// PublishL4NetLBDualStackSyncLatency exports the given sync latency datapoint.
func PublishL4NetLBDualStackSyncLatency(success bool, syncType, ipFamilies string, startTime time.Time) {
status := statusSuccess
if !success {
status = statusError
}
l4NetLBDualStackSyncLatency.WithLabelValues(status, syncType, ipFamilies).Observe(time.Since(startTime).Seconds())
}

// PublishL4NetLBSyncError exports latency and error count metrics for L4 NetLB after error sync.
func PublishL4NetLBSyncError(syncType, gceResource, errType string, startTime time.Time) {
l4NetLBSyncLatency.WithLabelValues(statusError, syncType).Observe(time.Since(startTime).Seconds())
Expand Down