Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable inactivity check on ovndb connection #4006

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions pkg/controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ import (

// Configuration is the controller conf
type Configuration struct {
BindAddress string
OvnNbAddr string
OvnSbAddr string
OvnTimeout int
CustCrdRetryMaxDelay int
CustCrdRetryMinDelay int
KubeConfigFile string
KubeRestConfig *rest.Config
BindAddress string
OvnNbAddr string
OvnSbAddr string
OvnTimeout int
OvsDbConnectTimeout int
OvsDbInactivityTimeout int
CustCrdRetryMaxDelay int
CustCrdRetryMinDelay int
KubeConfigFile string
KubeRestConfig *rest.Config

KubeClient kubernetes.Interface
KubeOvnClient clientset.Interface
Expand Down Expand Up @@ -106,12 +108,14 @@ type Configuration struct {
// TODO: validate configuration
func ParseFlags() (*Configuration, error) {
var (
argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address")
argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address")
argOvnTimeout = pflag.Int("ovn-timeout", 60, "The seconds to wait ovn command timeout")
argCustCrdRetryMinDelay = pflag.Int("cust-crd-retry-min-delay", 1, "The min delay seconds between custom crd two retries")
argCustCrdRetryMaxDelay = pflag.Int("cust-crd-retry-max-delay", 20, "The max delay seconds between custom crd two retries")
argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.")
argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address")
argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address")
argOvnTimeout = pflag.Int("ovn-timeout", 60, "The seconds to wait ovn command timeout")
argOvsDbConTimeout = pflag.Int("ovsdb-con-timeout", 3, "The seconds to wait ovsdb connect timeout")
argOvsDbInactivityTimeout = pflag.Int("ovsdb-inactivity-timeout", 10, "The seconds to wait ovsdb inactivity check timeout")
argCustCrdRetryMinDelay = pflag.Int("cust-crd-retry-min-delay", 1, "The min delay seconds between custom crd two retries")
argCustCrdRetryMaxDelay = pflag.Int("cust-crd-retry-max-delay", 20, "The max delay seconds between custom crd two retries")
argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.")

argDefaultLogicalSwitch = pflag.String("default-ls", util.DefaultSubnet, "The default logical switch name")
argDefaultCIDR = pflag.String("default-cidr", "10.16.0.0/16", "Default CIDR for namespace with no logical switch annotation")
Expand Down Expand Up @@ -195,6 +199,8 @@ func ParseFlags() (*Configuration, error) {
OvnNbAddr: *argOvnNbAddr,
OvnSbAddr: *argOvnSbAddr,
OvnTimeout: *argOvnTimeout,
OvsDbConnectTimeout: *argOvsDbConTimeout,
OvsDbInactivityTimeout: *argOvsDbInactivityTimeout,
CustCrdRetryMinDelay: *argCustCrdRetryMinDelay,
CustCrdRetryMaxDelay: *argCustCrdRetryMaxDelay,
KubeConfigFile: *argKubeConfigFile,
Expand Down
13 changes: 11 additions & 2 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,10 +472,19 @@ func Run(ctx context.Context, config *Configuration) {
}

var err error
if controller.OVNNbClient, err = ovs.NewOvnNbClient(config.OvnNbAddr, config.OvnTimeout); err != nil {
if controller.OVNNbClient, err = ovs.NewOvnNbClient(
config.OvnNbAddr,
config.OvnTimeout,
config.OvsDbConnectTimeout,
config.OvsDbInactivityTimeout); err != nil {
util.LogFatalAndExit(err, "failed to create ovn nb client")
}
if controller.OVNSbClient, err = ovs.NewOvnSbClient(config.OvnSbAddr, config.OvnTimeout); err != nil {
if controller.OVNSbClient, err = ovs.NewOvnSbClient(
config.OvnSbAddr,
config.OvnTimeout,
config.OvsDbConnectTimeout,
config.OvsDbInactivityTimeout,
); err != nil {
util.LogFatalAndExit(err, "failed to create ovn sb client")
}
if config.EnableLb {
Expand Down
28 changes: 17 additions & 11 deletions pkg/ovn_ic_controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ type Configuration struct {
KubeClient kubernetes.Interface
KubeOvnClient clientset.Interface

PodNamespace string
OvnNbAddr string
OvnSbAddr string
OvnTimeout int
PodNamespace string
OvnNbAddr string
OvnSbAddr string
OvnTimeout int
OvsDbConnectTimeout int
OvsDbInactivityTimeout int

NodeSwitch string
ClusterRouter string
Expand All @@ -35,9 +37,11 @@ func ParseFlags() (*Configuration, error) {
var (
argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.")

argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address")
argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address")
argOvnTimeout = pflag.Int("ovn-timeout", 60, "")
argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address")
argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address")
argOvnTimeout = pflag.Int("ovn-timeout", 60, "")
argOvsDbConTimeout = pflag.Int("ovsdb-con-timeout", 3, "")
argOvsDbInactivityTimeout = pflag.Int("ovsdb-inactivity-timeout", 10, "")

argClusterRouter = pflag.String("cluster-router", util.DefaultVpc, "The router name for cluster router")
argNodeSwitch = pflag.String("node-switch", "join", "The name of node gateway switch which help node to access pod network")
Expand Down Expand Up @@ -71,10 +75,12 @@ func ParseFlags() (*Configuration, error) {
config := &Configuration{
KubeConfigFile: *argKubeConfigFile,

PodNamespace: os.Getenv("POD_NAMESPACE"),
OvnNbAddr: *argOvnNbAddr,
OvnSbAddr: *argOvnSbAddr,
OvnTimeout: *argOvnTimeout,
PodNamespace: os.Getenv("POD_NAMESPACE"),
OvnNbAddr: *argOvnNbAddr,
OvnSbAddr: *argOvnSbAddr,
OvnTimeout: *argOvnTimeout,
OvsDbConnectTimeout: *argOvsDbConTimeout,
OvsDbInactivityTimeout: *argOvsDbInactivityTimeout,

ClusterRouter: *argClusterRouter,
NodeSwitch: *argNodeSwitch,
Expand Down
14 changes: 12 additions & 2 deletions pkg/ovn_ic_controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,20 @@ func NewController(config *Configuration) *Controller {
}

var err error
if controller.OVNNbClient, err = ovs.NewOvnNbClient(config.OvnNbAddr, config.OvnTimeout); err != nil {
if controller.OVNNbClient, err = ovs.NewOvnNbClient(
config.OvnNbAddr,
config.OvnTimeout,
config.OvsDbConnectTimeout,
config.OvsDbInactivityTimeout,
); err != nil {
util.LogFatalAndExit(err, "failed to create ovn nb client")
}
if controller.OVNSbClient, err = ovs.NewOvnSbClient(config.OvnSbAddr, config.OvnTimeout); err != nil {
if controller.OVNSbClient, err = ovs.NewOvnSbClient(
config.OvnSbAddr,
config.OvnTimeout,
config.OvsDbConnectTimeout,
config.OvsDbInactivityTimeout,
); err != nil {
util.LogFatalAndExit(err, "failed to create ovn sb client")
}

Expand Down
22 changes: 18 additions & 4 deletions pkg/ovs/ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func NewLegacyClient(timeout int) *LegacyClient {
}
}

func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) {
func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout, ovsDbConTimeout, ovsDbInactivityTimeout int) (*OVNNbClient, error) {
dbModel, err := ovnnb.FullDatabaseModel()
if err != nil {
klog.Error(err)
Expand Down Expand Up @@ -83,7 +83,14 @@ func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) {
maxRetry := 60
var nbClient client.Client
for {
nbClient, err = ovsclient.NewOvsDbClient(ovsclient.NBDB, ovnNbAddr, dbModel, monitors)
nbClient, err = ovsclient.NewOvsDbClient(
ovsclient.NBDB,
ovnNbAddr,
dbModel,
monitors,
ovsDbConTimeout,
ovsDbInactivityTimeout,
)
if err != nil {
klog.Errorf("failed to create OVN NB client: %v", err)
} else {
Expand All @@ -105,7 +112,7 @@ func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) {
return c, nil
}

func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout int) (*OVNSbClient, error) {
func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout, ovsDbConTimeout, ovsDbInactivityTimeout int) (*OVNSbClient, error) {
dbModel, err := ovnsb.FullDatabaseModel()
if err != nil {
klog.Error(err)
Expand All @@ -120,7 +127,14 @@ func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout int) (*OVNSbClient, error) {
try := 0
var sbClient client.Client
for {
sbClient, err = ovsclient.NewOvsDbClient(ovsclient.SBDB, ovnSbAddr, dbModel, monitors)
sbClient, err = ovsclient.NewOvsDbClient(
ovsclient.SBDB,
ovnSbAddr,
dbModel,
monitors,
ovsDbConTimeout,
ovsDbInactivityTimeout,
)
if err != nil {
klog.Errorf("failed to create OVN SB client: %v", err)
} else {
Expand Down
21 changes: 17 additions & 4 deletions pkg/ovsdb/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ const (
ICNBDB = "icnbdb"
ICSBDB = "icsbdb"
)
const timeout = 3 * time.Second

var namedUUIDCounter uint32

Expand All @@ -42,10 +41,24 @@ func NamedUUID() string {
}

// NewOvsDbClient creates a new ovsdb client
func NewOvsDbClient(db, addr string, dbModel model.ClientDBModel, monitors []client.MonitorOption) (client.Client, error) {
func NewOvsDbClient(
db string,
addr string,
dbModel model.ClientDBModel,
monitors []client.MonitorOption,
ovsDbConTimeout int,
ovsDbInactivityTimeout int,
) (client.Client, error) {
logger := klog.NewKlogr().WithName("libovsdb").WithValues("db", db)
connectTimeout := time.Duration(ovsDbConTimeout) * time.Second
inactivityTimeout := time.Duration(ovsDbInactivityTimeout) * time.Second
options := []client.Option{
client.WithReconnect(timeout, &backoff.ConstantBackOff{Interval: time.Second}),
// Reading and parsing the DB after reconnect at scale can (unsurprisingly)
// take longer than a normal ovsdb operation. Give it a bit more time so
// we don't time out and enter a reconnect loop. In addition it also enables
// inactivity check on the ovsdb connection.
client.WithInactivityCheck(inactivityTimeout, connectTimeout, &backoff.ZeroBackOff{}),

client.WithLeaderOnly(true),
client.WithLogger(&logger),
}
Expand Down Expand Up @@ -83,7 +96,7 @@ func NewOvsDbClient(db, addr string, dbModel model.ClientDBModel, monitors []cli
klog.Error(err)
return nil, err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(len(endpoints)+1)*timeout)
ctx, cancel := context.WithTimeout(context.Background(), connectTimeout)
defer cancel()
if err = c.Connect(ctx); err != nil {
klog.Errorf("failed to connect to OVN NB server %s: %v", addr, err)
Expand Down
Loading