Skip to content

Commit

Permalink
Add new metric status PersistentError for L4 DualStack
Browse files Browse the repository at this point in the history
Report PersistentError if service is in Error after 20 minutes of
retries
  • Loading branch information
panslava committed Feb 17, 2023
1 parent 839191b commit 5291909
Show file tree
Hide file tree
Showing 6 changed files with 239 additions and 63 deletions.
26 changes: 26 additions & 0 deletions pkg/l4lb/l4controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,32 @@ func TestProcessDualStackServiceOnUserError(t *testing.T) {
}
}

func TestDualStackILBStatusForErrorSync(t *testing.T) {
l4c := newServiceController(t, newFakeGCEWithUserNoIPv6SubnetError())
l4c.enableDualStack = true
(l4c.ctx.Cloud.Compute().(*cloud.MockGCE)).MockForwardingRules.InsertHook = mock.InsertForwardingRulesInternalErrHook

newSvc := test.NewL4ILBDualStackService(8080, api_v1.ProtocolTCP, []api_v1.IPFamily{api_v1.IPv4Protocol, api_v1.IPv6Protocol}, api_v1.ServiceExternalTrafficPolicyTypeCluster)
addILBService(l4c, newSvc)
addNEG(l4c, newSvc)
syncResult := l4c.processServiceCreateOrUpdate(newSvc)
if syncResult.Error == nil {
t.Fatalf("Failed to generate error when syncing service %s", newSvc.Name)
}
if syncResult.MetricsState.IsUserError {
t.Errorf("syncResult.MetricsState.IsUserError should be false, got true")
}
if syncResult.MetricsState.InSuccess {
t.Errorf("syncResult.MetricsState.InSuccess should be false, got true")
}
if syncResult.DualStackMetricsState.Status != metrics.StatusError {
t.Errorf("syncResult.DualStackMetricsState.Status should be %s, got %s", metrics.StatusError, syncResult.DualStackMetricsState.Status)
}
if syncResult.DualStackMetricsState.FirstSyncErrorTime == nil {
t.Fatalf("Metric status FirstSyncErrorTime for service %s/%s mismatch, expected: not nil, received: nil", newSvc.Namespace, newSvc.Name)
}
}

func TestProcessUpdateILBIPFamilies(t *testing.T) {
testCases := []struct {
desc string
Expand Down
6 changes: 4 additions & 2 deletions pkg/loadbalancers/l4.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,12 @@ func (l4 *L4) getFRNameWithProtocol(protocol string) string {
func (l4 *L4) EnsureInternalLoadBalancer(nodeNames []string, svc *corev1.Service) *L4ILBSyncResult {
l4.Service = svc

startTime := time.Now()
result := &L4ILBSyncResult{
Annotations: make(map[string]string),
StartTime: time.Now(),
StartTime: startTime,
SyncType: SyncTypeCreate,
DualStackMetricsState: metrics.InitServiceDualStackMetricsState(svc),
DualStackMetricsState: metrics.InitServiceDualStackMetricsState(svc, &startTime),
}

// If service already has an IP assigned, treat it as an update instead of a new Loadbalancer.
Expand Down Expand Up @@ -388,6 +389,7 @@ func (l4 *L4) EnsureInternalLoadBalancer(nodeNames []string, svc *corev1.Service
}
if l4.enableDualStack {
result.DualStackMetricsState.Status = metrics.StatusSuccess
result.DualStackMetricsState.FirstSyncErrorTime = nil
}
return result
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/loadbalancers/l4netlb.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func NewL4SyncResult(syncType string, svc *corev1.Service) *L4NetLBSyncResult {
StartTime: startTime,
SyncType: syncType,
MetricsState: metrics.InitL4NetLBServiceState(&startTime),
DualStackMetricsState: metrics.InitServiceDualStackMetricsState(svc),
DualStackMetricsState: metrics.InitServiceDualStackMetricsState(svc, &startTime),
}
return result
}
Expand Down
208 changes: 161 additions & 47 deletions pkg/metrics/l4metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,56 +381,82 @@ func TestComputeL4NetLBMetrics(t *testing.T) {

func TestComputeL4NetLBDualStackMetrics(t *testing.T) {
t.Parallel()

currTime := time.Now()
before10min := currTime.Add(-10 * time.Minute)
before20min := currTime.Add(-20 * time.Minute)

for _, tc := range []struct {
desc string
serviceStates []L4DualStackServiceState
expectL4NetLBDualStackCount map[L4DualStackServiceState]int
expectL4NetLBDualStackCount map[L4DualStackServiceLabels]int
}{
{
desc: "empty input",
serviceStates: []L4DualStackServiceState{},
expectL4NetLBDualStackCount: map[L4DualStackServiceState]int{},
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{},
},
{
desc: "one l4 NetLB dual-stack service",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess),
newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess, nil),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4", "SingleStack", StatusSuccess}: 1,
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusSuccess}: 1,
},
},
{
desc: "l4 NetLB dual-stack service in error state",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4", "SingleStack", StatusError),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1,
},
},
{
desc: "l4 NetLB dual-stack service in error state, for 10 minutes",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4", "SingleStack", StatusError}: 1,
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1,
},
},
{
desc: "l4 NetLB dual-stack service in error state, for 20 minutes",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1,
},
},
{
desc: "L4 NetLB dual-stack service with IPv4,IPv6 Families",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess),
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1,
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1,
},
},
{
desc: "many l4 NetLB dual-stack services",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess),
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2,
L4DualStackServiceState{"IPv4", "SingleStack", StatusError}: 1,
L4DualStackServiceState{"IPv6", "SingleStack", StatusSuccess}: 2,
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min),
},
expectL4NetLBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2,
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 2,
L4DualStackServiceLabels{"IPv6", "SingleStack", StatusSuccess}: 2,
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1,
},
},
} {
Expand Down Expand Up @@ -490,6 +516,58 @@ func TestRetryPeriodForL4NetLBServices(t *testing.T) {
}
}

func TestRetryPeriodForL4ILBDualStackServices(t *testing.T) {
t.Parallel()
currTime := time.Now()
before5min := currTime.Add(-5 * time.Minute)

svcName1 := "svc1"
metrics := FakeControllerMetrics()

errorState := newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &currTime)
metrics.SetL4ILBDualStackService(svcName1, errorState)

// change FirstSyncErrorTime and verify it will not change metrics state
errorState.FirstSyncErrorTime = &before5min
metrics.SetL4ILBDualStackService(svcName1, errorState)
state, ok := metrics.l4ILBDualStackServiceMap[svcName1]
if !ok {
t.Fatalf("state should be set")
}
if *state.FirstSyncErrorTime != currTime {
t.Errorf("FirstSyncErrorTime should not change, expected %v, got %v", currTime, *state.FirstSyncErrorTime)
}
if state.Status != StatusError {
t.Errorf("Expected status %s, got %s", StatusError, state.Status)
}
}

func TestRetryPeriodForL4NetLBDualStackServices(t *testing.T) {
t.Parallel()
currTime := time.Now()
before5min := currTime.Add(-5 * time.Minute)

svcName1 := "svc1"
metrics := FakeControllerMetrics()

errorState := newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &currTime)
metrics.SetL4NetLBDualStackService(svcName1, errorState)

// change FirstSyncErrorTime and verify it will not change metrics state
errorState.FirstSyncErrorTime = &before5min
metrics.SetL4NetLBDualStackService(svcName1, errorState)
state, ok := metrics.l4NetLBDualStackServiceMap[svcName1]
if !ok {
t.Fatalf("state should be set")
}
if *state.FirstSyncErrorTime != currTime {
t.Errorf("FirstSyncErrorTime should not change, expected %v, got %v", currTime, *state.FirstSyncErrorTime)
}
if state.Status != StatusError {
t.Errorf("Expected status %s, got %s", StatusError, state.Status)
}
}

func checkMetricsComputation(newMetrics *ControllerMetrics, expErrorCount, expSvcCount int) error {
got := newMetrics.computeL4NetLBMetrics()
if got.inError != expErrorCount {
Expand All @@ -503,56 +581,89 @@ func checkMetricsComputation(newMetrics *ControllerMetrics, expErrorCount, expSv

func TestComputeL4ILBDualStackMetrics(t *testing.T) {
t.Parallel()

currTime := time.Now()
before10min := currTime.Add(-10 * time.Minute)
before20min := currTime.Add(-20 * time.Minute)

for _, tc := range []struct {
desc string
serviceStates []L4DualStackServiceState
expectL4ILBDualStackCount map[L4DualStackServiceState]int
expectL4ILBDualStackCount map[L4DualStackServiceLabels]int
}{
{
desc: "empty input",
serviceStates: []L4DualStackServiceState{},
expectL4ILBDualStackCount: map[L4DualStackServiceState]int{},
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{},
},
{
desc: "one l4 ilb dual-stack service",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess),
newL4DualStackServiceState("IPv4", "SingleStack", StatusSuccess, nil),
},
expectL4ILBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4", "SingleStack", StatusSuccess}: 1,
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusSuccess}: 1,
},
},
{
desc: "l4 ilb dual-stack service in error state",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4", "SingleStack", StatusError),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil),
},
expectL4ILBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4", "SingleStack", StatusError}: 1,
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 1,
},
},
{
desc: "L4 ILB dual-stack service with IPv4,IPv6 Families",
desc: "l4 ilb dual-stack service in error state, for 10 minutes",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min),
},
expectL4ILBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1,
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{
"IPv4",
"SingleStack",
StatusError,
}: 1,
},
},
{
desc: "many l4 ilb dual-stack services",
desc: "l4 ilb dual-stack service in error state, for 20 minutes",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess),
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min),
},
expectL4ILBDualStackCount: map[L4DualStackServiceState]int{
L4DualStackServiceState{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2,
L4DualStackServiceState{"IPv4", "SingleStack", StatusError}: 1,
L4DualStackServiceState{"IPv6", "SingleStack", StatusSuccess}: 2,
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{
"IPv4",
"SingleStack",
StatusPersistentError,
}: 1,
},
},
{
desc: "L4 ILB dual-stack service with IPv4,IPv6 Families",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil),
},
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 1},
},
{
desc: "many l4 ilb dual-stack services",
serviceStates: []L4DualStackServiceState{
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv4,IPv6", "RequireDualStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, nil),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv6", "SingleStack", StatusSuccess, nil),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before10min),
newL4DualStackServiceState("IPv4", "SingleStack", StatusError, &before20min),
},
expectL4ILBDualStackCount: map[L4DualStackServiceLabels]int{
L4DualStackServiceLabels{"IPv4,IPv6", "RequireDualStack", StatusSuccess}: 2,
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusError}: 2,
L4DualStackServiceLabels{"IPv6", "SingleStack", StatusSuccess}: 2,
L4DualStackServiceLabels{"IPv4", "SingleStack", StatusPersistentError}: 1,
},
},
} {
Expand All @@ -571,10 +682,13 @@ func TestComputeL4ILBDualStackMetrics(t *testing.T) {
}
}

func newL4DualStackServiceState(ipFamilies string, ipFamilyPolicy string, status L4DualStackServiceStatus) L4DualStackServiceState {
func newL4DualStackServiceState(ipFamilies string, ipFamilyPolicy string, status L4DualStackServiceStatus, firstSyncErrorTime *time.Time) L4DualStackServiceState {
return L4DualStackServiceState{
IPFamilies: ipFamilies,
IPFamilyPolicy: ipFamilyPolicy,
Status: status,
L4DualStackServiceLabels: L4DualStackServiceLabels{
IPFamilies: ipFamilies,
IPFamilyPolicy: ipFamilyPolicy,
Status: status,
},
FirstSyncErrorTime: firstSyncErrorTime,
}
}
Loading

0 comments on commit 5291909

Please sign in to comment.