Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry PFCP Association Setup #60

Merged
merged 5 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ github.com/free5gc/nas v1.0.7 h1:c+UXWENvJgTr/QZl50SyX+ZZzUdazGHHZ9ZGtnnwG5A=
github.com/free5gc/nas v1.0.7/go.mod h1:qPj0gxFk81cH9zIkg4hm3ID0hkYofBlzZzcciBnJxwY=
github.com/free5gc/ngap v1.0.6 h1:f9sKqHMNrFZVo9Kp8hAyrCXSoI8l746N5O+DFn7vKHA=
github.com/free5gc/ngap v1.0.6/go.mod h1:TG1kwwU/EyIlJ3bxY591rdxpD5ZeYnLZTzoWjcfvrBM=
github.com/free5gc/openapi v1.0.4/go.mod h1:KRCnnp0GeK0Bl4gnrX79cQAidKXNENf8VRdG0y9R0Fc=
github.com/free5gc/openapi v1.0.5 h1:S25JqyrTgLwcH6pqZE6U448vv0RKg1CoH48AQ4Cj/d4=
github.com/free5gc/openapi v1.0.5/go.mod h1:KRCnnp0GeK0Bl4gnrX79cQAidKXNENf8VRdG0y9R0Fc=
github.com/free5gc/pfcp v1.0.4 h1:11ous/chOya/bG0bHAHHEUc7JUB2g6svABock8Ta2Zs=
Expand Down
20 changes: 15 additions & 5 deletions internal/context/context.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package context

import (
"context"
"fmt"
"net"
"os"
"sync/atomic"
"time"

"github.com/google/uuid"

Expand Down Expand Up @@ -43,13 +45,15 @@ type SMFContext struct {

SnssaiInfos []SnssaiSmfInfo

NrfUri string
NFManagementClient *Nnrf_NFManagement.APIClient
NFDiscoveryClient *Nnrf_NFDiscovery.APIClient
SubscriberDataManagementClient *Nudm_SubscriberDataManagement.APIClient
Locality string
NrfUri string
NFManagementClient *Nnrf_NFManagement.APIClient
NFDiscoveryClient *Nnrf_NFDiscovery.APIClient
SubscriberDataManagementClient *Nudm_SubscriberDataManagement.APIClient
Locality string
AssociationSetupFailedAlertInterval time.Duration

UserPlaneInformation *UserPlaneInformation
PFCPCancelFunc context.CancelFunc

// Now only "IPv4" supported
// TODO: support "IPv6", "IPv4v6", "Ethernet"
Expand Down Expand Up @@ -154,6 +158,12 @@ func InitSmfContext(config *factory.Config) {
smfContext.CPNodeID.NodeIdType = pfcpType.NodeIdTypeIpv6Address
smfContext.CPNodeID.IP = addr.IP
}

if pfcp.AlertInterval == 0 {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if AlertInterval is 0, smf should not launch the retry mechanism.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AssociationSetupFailedAlertInterval represents the interval at which error messages are output, not retry association.
Our system monitors error messages and notifies operators, this value is used to reduce the frequency of notifications when the same error message is output consecutively.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, but it is a little bit confusing that no alert trigger in code and only retry to set up association.

smfContext.AssociationSetupFailedAlertInterval = 5 * time.Minute
} else {
smfContext.AssociationSetupFailedAlertInterval = pfcp.AlertInterval
}
}

smfContext.SnssaiInfos = make([]SnssaiSmfInfo, 0, len(configuration.SNssaiInfo))
Expand Down
2 changes: 2 additions & 0 deletions pkg/factory/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ func (t *Tls) validate() (bool, error) {
type PFCP struct {
Addr string `yaml:"addr,omitempty" valid:"host,required"`
Port uint16 `yaml:"port,omitempty" valid:"port,optional"`
// interval at which PFCP Association Setup error messages are output.
AlertInterval time.Duration `yaml:"associationSetupFailedAlertInterval,omitempty" valid:"type(time.Duration),optional"`
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AssocFailRetryPeriod time.Duration `yaml:"assocFailRetryPeriod,omitempty" valid:"type(time.Duration),optional"`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment on L162.

}

func (p *PFCP) validate() (bool, error) {
Expand Down
42 changes: 42 additions & 0 deletions pkg/service/association.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package service

import (
"context"
"fmt"
"time"

"github.com/free5gc/pfcp"
"github.com/free5gc/pfcp/pfcpType"
Expand All @@ -10,6 +12,46 @@ import (
"github.com/free5gc/smf/internal/pfcp/message"
)

func toBeAssociatedWithUPF(ctx context.Context, upf *smf_context.UPF) {
var upfStr string
if upf.NodeID.NodeIdType == pfcpType.NodeIdTypeFqdn {
upfStr = fmt.Sprintf("[%s](%s)", upf.NodeID.FQDN, upf.NodeID.ResolveNodeIdToIp().String())
} else {
upfStr = fmt.Sprintf("[%s]", upf.NodeID.ResolveNodeIdToIp().String())
}
ensureSetupPfcpAssociation(ctx, upf, upfStr)
}

func isDone(ctx context.Context) bool {
select {
case <-ctx.Done():
return true
default:
return false
}
}

func ensureSetupPfcpAssociation(ctx context.Context, upf *smf_context.UPF, upfStr string) {
var alertTime time.Time
for {
alertInterval := smf_context.SMF_Self().AssociationSetupFailedAlertInterval
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alertInterval can be moved out for loop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, so I revised it.

err := setupPfcpAssociation(upf, upfStr)
if err == nil {
return
}
now := time.Now()
if alertTime.IsZero() || now.After(alertTime.Add(alertInterval)) {
logger.AppLog.Errorf("Failed to setup an association with UPF%s, error:%+v", upfStr, err)
alertTime = now
}

if isDone(ctx) {
logger.AppLog.Infof("Canceled association request to UPF%s", upfStr)
return
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a busy loop and can add Sleep 1 second here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. Because PFCP Association Setup Request is retransmitted at the interval defined as ResendRequestTimeOutPeriod(= 3s) in pfcp library.

Copy link
Collaborator

@tim-ywliu tim-ywliu Nov 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I got it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but this routine will be blocked in now.After(alertTime.Add(alertInterval). It will cause ctx.Done() can't be handled immediately.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for {
	err := setupPfcpAssociation(upf, upfStr)
	if err == nil {
		return
	}

	alertInterval := smf_context.SMF_Self().AssociationSetupFailedAlertInterval
	now := time.Now()
	select {
	case <-now.After(alertTime.Add(alertInterval)):
		logger.AppLog.Errorf("Failed to setup an association with UPF%s, error:%+v", upfStr, err)
	case <-ctx.Done():
		logger.AppLog.Infof("Canceled association request to UPF%s", upfStr)
		return
	}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now.After(...) is func After of type Time(see https://pkg.go.dev/time#Time.After) , not func After of package time.(see https://pkg.go.dev/time#After).
So this routine is not blocked.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it

}

func setupPfcpAssociation(upf *smf_context.UPF, upfStr string) error {
logger.AppLog.Infof("Sending PFCP Association Request to UPF%s", upfStr)

Expand Down
26 changes: 10 additions & 16 deletions pkg/service/init.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package service

import (
"context"
"fmt"
"os"
"os/signal"
Expand All @@ -16,8 +17,7 @@ import (
ngapLogger "github.com/free5gc/ngap/logger"
"github.com/free5gc/openapi/models"
pfcpLogger "github.com/free5gc/pfcp/logger"
"github.com/free5gc/pfcp/pfcpType"
"github.com/free5gc/smf/internal/context"
smf_context "github.com/free5gc/smf/internal/context"
"github.com/free5gc/smf/internal/logger"
"github.com/free5gc/smf/internal/pfcp"
"github.com/free5gc/smf/internal/pfcp/udp"
Expand Down Expand Up @@ -222,10 +222,10 @@ func (smf *SMF) Start() {
keyPath = sbi.Tls.Key
}

context.InitSmfContext(&factory.SmfConfig)
smf_context.InitSmfContext(&factory.SmfConfig)
// allocate id for each upf
context.AllocateUPFID()
context.InitSMFUERouting(&factory.UERoutingConfig)
smf_context.AllocateUPFID()
smf_context.InitSMFUERouting(&factory.UERoutingConfig)

logger.InitLog.Infoln("Server started")
router := logger_util.NewGinWithLogrus(logger.GinLog)
Expand Down Expand Up @@ -266,21 +266,15 @@ func (smf *SMF) Start() {
}
udp.Run(pfcp.Dispatch)

for _, upf := range context.SMF_Self().UserPlaneInformation.UPFs {
var upfStr string
if upf.NodeID.NodeIdType == pfcpType.NodeIdTypeFqdn {
upfStr = fmt.Sprintf("[%s](%s)", upf.NodeID.FQDN, upf.NodeID.ResolveNodeIdToIp().String())
} else {
upfStr = fmt.Sprintf("[%s]", upf.NodeID.IP.String())
}
if err = setupPfcpAssociation(upf.UPF, upfStr); err != nil {
logger.AppLog.Errorf("Failed to setup an association with UPF%s, error:%+v", upfStr, err)
}
ctx, cancel := context.WithCancel(context.Background())
smf_context.SMF_Self().PFCPCancelFunc = cancel
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When to use the cancel() function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is used to accommodate dynamic configuration changes that may be implemented in the future.

for _, upNode := range smf_context.SMF_Self().UserPlaneInformation.UPFs {
go toBeAssociatedWithUPF(ctx, upNode.UPF)
}

time.Sleep(1000 * time.Millisecond)

HTTPAddr := fmt.Sprintf("%s:%d", context.SMF_Self().BindingIPv4, context.SMF_Self().SBIPort)
HTTPAddr := fmt.Sprintf("%s:%d", smf_context.SMF_Self().BindingIPv4, smf_context.SMF_Self().SBIPort)
server, err := httpwrapper.NewHttp2Server(HTTPAddr, smf.KeyLogPath, router)

if server == nil {
Expand Down