From 39ba609785352fd56da1b332f2009c584d034edb Mon Sep 17 00:00:00 2001 From: fanriming Date: Wed, 5 Jun 2024 09:41:15 +0800 Subject: [PATCH] Enable inactivity check on ovndb connection (#4006) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: fanriming Co-authored-by: 张祖建 --- pkg/controller/config.go | 34 ++++++++++++++++++++-------------- pkg/controller/controller.go | 13 +++++++++++-- pkg/ovs/ovn.go | 22 ++++++++++++++++++---- pkg/ovsdb/client/client.go | 21 +++++++++++++++++---- 4 files changed, 66 insertions(+), 24 deletions(-) diff --git a/pkg/controller/config.go b/pkg/controller/config.go index 7d0335c3fce..3b684a95461 100644 --- a/pkg/controller/config.go +++ b/pkg/controller/config.go @@ -20,14 +20,16 @@ import ( // Configuration is the controller conf type Configuration struct { - BindAddress string - OvnNbAddr string - OvnSbAddr string - OvnTimeout int - CustCrdRetryMaxDelay int - CustCrdRetryMinDelay int - KubeConfigFile string - KubeRestConfig *rest.Config + BindAddress string + OvnNbAddr string + OvnSbAddr string + OvnTimeout int + OvsDbConnectTimeout int + OvsDbInactivityTimeout int + CustCrdRetryMaxDelay int + CustCrdRetryMinDelay int + KubeConfigFile string + KubeRestConfig *rest.Config KubeClient kubernetes.Interface KubeOvnClient clientset.Interface @@ -106,12 +108,14 @@ type Configuration struct { // TODO: validate configuration func ParseFlags() (*Configuration, error) { var ( - argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address") - argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address") - argOvnTimeout = pflag.Int("ovn-timeout", 60, "The seconds to wait ovn command timeout") - argCustCrdRetryMinDelay = pflag.Int("cust-crd-retry-min-delay", 1, "The min delay seconds between custom crd two retries") - argCustCrdRetryMaxDelay = pflag.Int("cust-crd-retry-max-delay", 20, "The max delay seconds between custom crd two retries") - argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.") + argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address") + argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address") + argOvnTimeout = pflag.Int("ovn-timeout", 60, "The seconds to wait ovn command timeout") + argOvsDbConTimeout = pflag.Int("ovsdb-con-timeout", 3, "The seconds to wait ovsdb connect timeout") + argOvsDbInactivityTimeout = pflag.Int("ovsdb-inactivity-timeout", 10, "The seconds to wait ovsdb inactivity check timeout") + argCustCrdRetryMinDelay = pflag.Int("cust-crd-retry-min-delay", 1, "The min delay seconds between custom crd two retries") + argCustCrdRetryMaxDelay = pflag.Int("cust-crd-retry-max-delay", 20, "The max delay seconds between custom crd two retries") + argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.") argDefaultLogicalSwitch = pflag.String("default-ls", util.DefaultSubnet, "The default logical switch name") argDefaultCIDR = pflag.String("default-cidr", "10.16.0.0/16", "Default CIDR for namespace with no logical switch annotation") @@ -195,6 +199,8 @@ func ParseFlags() (*Configuration, error) { OvnNbAddr: *argOvnNbAddr, OvnSbAddr: *argOvnSbAddr, OvnTimeout: *argOvnTimeout, + OvsDbConnectTimeout: *argOvsDbConTimeout, + OvsDbInactivityTimeout: *argOvsDbInactivityTimeout, CustCrdRetryMinDelay: *argCustCrdRetryMinDelay, CustCrdRetryMaxDelay: *argCustCrdRetryMaxDelay, KubeConfigFile: *argKubeConfigFile, diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index a43fe1d7c85..b862de58fa1 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -472,10 +472,19 @@ func Run(ctx context.Context, config *Configuration) { } var err error - if controller.OVNNbClient, err = ovs.NewOvnNbClient(config.OvnNbAddr, config.OvnTimeout); err != nil { + if controller.OVNNbClient, err = ovs.NewOvnNbClient( + config.OvnNbAddr, + config.OvnTimeout, + config.OvsDbConnectTimeout, + config.OvsDbInactivityTimeout); err != nil { util.LogFatalAndExit(err, "failed to create ovn nb client") } - if controller.OVNSbClient, err = ovs.NewOvnSbClient(config.OvnSbAddr, config.OvnTimeout); err != nil { + if controller.OVNSbClient, err = ovs.NewOvnSbClient( + config.OvnSbAddr, + config.OvnTimeout, + config.OvsDbConnectTimeout, + config.OvsDbInactivityTimeout, + ); err != nil { util.LogFatalAndExit(err, "failed to create ovn sb client") } if config.EnableLb { diff --git a/pkg/ovs/ovn.go b/pkg/ovs/ovn.go index 9de23d922ac..0dda7a70bb5 100644 --- a/pkg/ovs/ovn.go +++ b/pkg/ovs/ovn.go @@ -53,7 +53,7 @@ func NewLegacyClient(timeout int) *LegacyClient { } } -func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) { +func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout, ovsDbConTimeout, ovsDbInactivityTimeout int) (*OVNNbClient, error) { dbModel, err := ovnnb.FullDatabaseModel() if err != nil { klog.Error(err) @@ -83,7 +83,14 @@ func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) { maxRetry := 60 var nbClient client.Client for { - nbClient, err = ovsclient.NewOvsDbClient(ovsclient.NBDB, ovnNbAddr, dbModel, monitors) + nbClient, err = ovsclient.NewOvsDbClient( + ovsclient.NBDB, + ovnNbAddr, + dbModel, + monitors, + ovsDbConTimeout, + ovsDbInactivityTimeout, + ) if err != nil { klog.Errorf("failed to create OVN NB client: %v", err) } else { @@ -105,7 +112,7 @@ func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) { return c, nil } -func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout int) (*OVNSbClient, error) { +func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout, ovsDbConTimeout, ovsDbInactivityTimeout int) (*OVNSbClient, error) { dbModel, err := ovnsb.FullDatabaseModel() if err != nil { klog.Error(err) @@ -120,7 +127,14 @@ func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout int) (*OVNSbClient, error) { try := 0 var sbClient client.Client for { - sbClient, err = ovsclient.NewOvsDbClient(ovsclient.SBDB, ovnSbAddr, dbModel, monitors) + sbClient, err = ovsclient.NewOvsDbClient( + ovsclient.SBDB, + ovnSbAddr, + dbModel, + monitors, + ovsDbConTimeout, + ovsDbInactivityTimeout, + ) if err != nil { klog.Errorf("failed to create OVN SB client: %v", err) } else { diff --git a/pkg/ovsdb/client/client.go b/pkg/ovsdb/client/client.go index 9ee2c5ef902..008bc676422 100644 --- a/pkg/ovsdb/client/client.go +++ b/pkg/ovsdb/client/client.go @@ -25,7 +25,6 @@ const ( ICNBDB = "icnbdb" ICSBDB = "icsbdb" ) -const timeout = 3 * time.Second var namedUUIDCounter uint32 @@ -42,10 +41,24 @@ func NamedUUID() string { } // NewOvsDbClient creates a new ovsdb client -func NewOvsDbClient(db, addr string, dbModel model.ClientDBModel, monitors []client.MonitorOption) (client.Client, error) { +func NewOvsDbClient( + db string, + addr string, + dbModel model.ClientDBModel, + monitors []client.MonitorOption, + ovsDbConTimeout int, + ovsDbInactivityTimeout int, +) (client.Client, error) { logger := klog.NewKlogr().WithName("libovsdb").WithValues("db", db) + connectTimeout := time.Duration(ovsDbConTimeout) * time.Second + inactivityTimeout := time.Duration(ovsDbInactivityTimeout) * time.Second options := []client.Option{ - client.WithReconnect(timeout, &backoff.ConstantBackOff{Interval: time.Second}), + // Reading and parsing the DB after reconnect at scale can (unsurprisingly) + // take longer than a normal ovsdb operation. Give it a bit more time so + // we don't time out and enter a reconnect loop. In addition it also enables + // inactivity check on the ovsdb connection. + client.WithInactivityCheck(inactivityTimeout, connectTimeout, &backoff.ZeroBackOff{}), + client.WithLeaderOnly(true), client.WithLogger(&logger), } @@ -83,7 +96,7 @@ func NewOvsDbClient(db, addr string, dbModel model.ClientDBModel, monitors []cli klog.Error(err) return nil, err } - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(len(endpoints)+1)*timeout) + ctx, cancel := context.WithTimeout(context.Background(), connectTimeout) defer cancel() if err = c.Connect(ctx); err != nil { klog.Errorf("failed to connect to OVN NB server %s: %v", addr, err)