Skip to content

Commit

Permalink
[bgpcfg]: Batch bgp updates (sonic-net#6006)
Browse files Browse the repository at this point in the history
* [bgpcfgd]: Batch bgp updates.

vtysh -f command is slow. It is sometimes takes about 3 seconds.
When we need to run many vtysh -f commands that slows down the system.
Batch vtysh -f updates.

* Use correct file to import run_command
  • Loading branch information
pavel-shirshov authored Nov 25, 2020
1 parent 8f0452d commit 148436d
Show file tree
Hide file tree
Showing 13 changed files with 395 additions and 256 deletions.
67 changes: 30 additions & 37 deletions src/sonic-bgpcfgd/bgpcfgd/config.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,66 @@
import os
import tempfile

from .vars import g_debug
from .log import log_crit, log_err
from .utils import run_command


class ConfigMgr(object):
""" The class represents frr configuration """
def __init__(self):
def __init__(self, frr):
self.frr = frr
self.current_config = None
self.current_config_raw = None
self.changes = ""
self.peer_groups_to_restart = []

def reset(self):
""" Reset stored config """
self.current_config = None
self.current_config_raw = None
self.changes = ""
self.peer_groups_to_restart = []

def update(self):
""" Read current config from FRR """
self.current_config = None
self.current_config_raw = None
ret_code, out, err = run_command(["vtysh", "-c", "show running-config"])
if ret_code != 0:
# FIXME: should we throw exception here?
log_crit("can't update running config: rc=%d out='%s' err='%s'" % (ret_code, out, err))
return
out = self.frr.get_config()
text = []
for line in out.split('\n'):
if line.lstrip().startswith('!'):
continue
text.append(line)
text += [" "] # Add empty line to have something to work on, if there is no text
self.current_config_raw = text
self.current_config = self.to_canonical(out) # FIXME: use test as an input
self.current_config = self.to_canonical(out) # FIXME: use text as an input

def push_list(self, cmdlist):
return self.push("\n".join(cmdlist))
"""
Prepare new changes for FRR. The changes should be committed by self.commit()
:param cmdlist: configuration change for FRR. Type: List of Strings
"""
self.changes += "\n".join(cmdlist) + "\n"

def push(self, cmd):
"""
Push new changes to FRR
Prepare new changes for FRR. The changes should be committed by self.commit()
:param cmd: configuration change for FRR. Type: String
:return: True if change was applied successfully, False otherwise
"""
return self.write(cmd)
self.changes += cmd + "\n"
return True

def restart_peer_groups(self, peer_groups):
"""
Schedule peer_groups for restart on commit
:param peer_groups: List of peer_groups
"""
self.peer_groups_to_restart.extend(peer_groups)

def write(self, cmd):
def commit(self):
"""
Write configuration change to FRR.
:param cmd: new configuration to write into FRR. Type: String
:return: True if change was applied successfully, False otherwise
"""
fd, tmp_filename = tempfile.mkstemp(dir='/tmp')
os.close(fd)
with open(tmp_filename, 'w') as fp:
fp.write("%s\n" % cmd)
command = ["vtysh", "-f", tmp_filename]
ret_code, out, err = run_command(command)
if not g_debug:
os.remove(tmp_filename)
if ret_code != 0:
err_tuple = str(cmd), ret_code, out, err
log_err("ConfigMgr::push(): can't push configuration '%s', rc='%d', stdout='%s', stderr='%s'" % err_tuple)
if ret_code == 0:
self.current_config = None # invalidate config
self.current_config_raw = None
return ret_code == 0
if self.changes.strip() == "":
return True
rc_write = self.frr.write(self.changes)
rc_restart = self.frr.restart_peer_groups(self.peer_groups_to_restart)
self.reset()
return rc_write and rc_restart

def get_text(self):
return self.current_config_raw
Expand All @@ -89,7 +83,6 @@ def to_canonical(raw_config):
for line in lines:
n_spaces = ConfigMgr.count_spaces(line)
s_line = line.strip()
# assert(n_spaces == cur_offset or (n_spaces + 1) == cur_offset or (n_spaces - 1) == cur_offset)
if n_spaces == cur_offset:
cur_path[-1] = s_line
elif n_spaces > cur_offset:
Expand Down
70 changes: 70 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/frr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import datetime
import time
import tempfile

from bgpcfgd.log import log_err, log_info, log_warn, log_crit
from .vars import g_debug
from .utils import run_command


class FRR(object):
"""Proxy object with FRR"""
def __init__(self, daemons):
self.daemons = daemons

def wait_for_daemons(self, seconds):
"""
Wait until FRR daemons are ready for requests
:param seconds: number of seconds to wait, until raise an error
"""
stop_time = datetime.datetime.now() + datetime.timedelta(seconds=seconds)
log_info("Start waiting for FRR daemons: %s" % str(datetime.datetime.now()))
while datetime.datetime.now() < stop_time:
ret_code, out, err = run_command(["vtysh", "-c", "show daemons"], hide_errors=True)
if ret_code == 0 and all(daemon in out for daemon in self.daemons):
log_info("All required daemons have connected to vtysh: %s" % str(datetime.datetime.now()))
return
else:
log_warn("Can't read daemon status from FRR: %s" % str(err))
time.sleep(0.1) # sleep 100 ms
raise RuntimeError("FRR daemons hasn't been started in %d seconds" % seconds)

@staticmethod
def get_config():
ret_code, out, err = run_command(["vtysh", "-c", "show running-config"])
if ret_code != 0:
log_crit("can't update running config: rc=%d out='%s' err='%s'" % (ret_code, out, err))
return ""
return out

@staticmethod
def write(config_text):
fd, tmp_filename = tempfile.mkstemp(dir='/tmp')
os.close(fd)
with open(tmp_filename, 'w') as fp:
fp.write("%s\n" % config_text)
command = ["vtysh", "-f", tmp_filename]
ret_code, out, err = run_command(command)
if ret_code != 0:
err_tuple = tmp_filename, ret_code, out, err
log_err("ConfigMgr::commit(): can't push configuration from file='%s', rc='%d', stdout='%s', stderr='%s'" % err_tuple)
else:
if not g_debug:
os.remove(tmp_filename)
return ret_code == 0

@staticmethod
def restart_peer_groups(peer_groups):
""" Restart peer-groups which support BBR
:param peer_groups: List of peer_groups to restart
:return: True if restart of all peer-groups was successful, False otherwise
"""
res = True
for peer_group in sorted(peer_groups):
rc, out, err = run_command(["vtysh", "-c", "clear bgp peer-group %s soft in" % peer_group])
if rc != 0:
log_value = peer_group, rc, out, err
log_crit("Can't restart bgp peer-group '%s'. rc='%d', out='%s', err='%s'" % log_value)
res = res and (rc == 0)
return res
10 changes: 6 additions & 4 deletions src/sonic-bgpcfgd/bgpcfgd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@
from .managers_setsrc import ZebraSetSrc
from .runner import Runner, signal_handler
from .template import TemplateFabric
from .utils import wait_for_daemons, read_constants
from .utils import read_constants
from .frr import FRR
from .vars import g_debug


def do_work():
""" Main function """
wait_for_daemons(["bgpd", "zebra", "staticd"], seconds=20)
frr = FRR(["bgpd", "zebra", "staticd"])
frr.wait_for_daemons(seconds=20)
#
common_objs = {
'directory': Directory(),
'cfg_mgr': ConfigMgr(),
'cfg_mgr': ConfigMgr(frr),
'tf': TemplateFabric(),
'constants': read_constants(),
}
Expand All @@ -52,7 +54,7 @@ def do_work():
# BBR Manager
BBRMgr(common_objs, "CONFIG_DB", "BGP_BBR"),
]
runner = Runner()
runner = Runner(common_objs['cfg_mgr'])
for mgr in managers:
runner.add_manager(mgr)
runner.run()
Expand Down
36 changes: 11 additions & 25 deletions src/sonic-bgpcfgd/bgpcfgd/managers_allow_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .log import log_debug, log_info, log_err, log_warn
from .template import TemplateFabric
from .manager import Manager
from .utils import run_command


class BGPAllowListMgr(Manager):
""" This class initialize "AllowList" settings """
Expand Down Expand Up @@ -147,9 +147,10 @@ def __update_policy(self, deployment_id, community_value, prefixes_v4, prefixes_
cmds += self.__update_allow_route_map_entry(self.V4, names['pl_v4'], names['community'], names['rm_v4'])
cmds += self.__update_allow_route_map_entry(self.V6, names['pl_v6'], names['community'], names['rm_v6'])
if cmds:
rc = self.cfg_mgr.push_list(cmds)
rc = rc and self.__restart_peers(deployment_id)
log_debug("BGPAllowListMgr::__update_policy. The peers were updated: rc=%s" % rc)
self.cfg_mgr.push_list(cmds)
peer_groups = self.__find_peer_group_by_deployment_id(deployment_id)
self.cfg_mgr.restart_peers(peer_groups)
log_debug("BGPAllowListMgr::__update_policy. The peers configuration scheduled for updates")
else:
log_debug("BGPAllowListMgr::__update_policy. Nothing to update")
log_info("BGPAllowListMgr::Done")
Expand All @@ -176,9 +177,10 @@ def __remove_policy(self, deployment_id, community_value):
cmds += self.__remove_prefix_list(self.V6, names['pl_v6'])
cmds += self.__remove_community(names['community'])
if cmds:
rc = self.cfg_mgr.push_list(cmds)
rc = rc and self.__restart_peers(deployment_id)
log_debug("BGPAllowListMgr::__remove_policy. 'Allow list' policy was removed. rc:%s" % rc)
self.cfg_mgr.push_list(cmds)
peer_groups = self.__find_peer_group_by_deployment_id(deployment_id)
self.cfg_mgr.restart_peers(peer_groups)
log_debug("BGPAllowListMgr::__remove_policy. 'Allow list' policy was scheduled for removal")
else:
log_debug("BGPAllowListMgr::__remove_policy. Nothing to remove")
log_info('BGPAllowListMgr::Done')
Expand Down Expand Up @@ -531,7 +533,8 @@ def __get_route_map_calls(self, rms):
inside_name = result.group(1)
return rm_2_call

def __get_peer_group_to_restart(self, deployment_id, pg_2_rm, rm_2_call):
@staticmethod
def __get_peer_group_to_restart(deployment_id, pg_2_rm, rm_2_call):
"""
Get peer_groups which are assigned to deployment_id
:deployment_id: deployment_id number
Expand Down Expand Up @@ -560,23 +563,6 @@ def __find_peer_group_by_deployment_id(self, deployment_id):
ret = self.__get_peer_group_to_restart(deployment_id, pg_2_rm, rm_2_call)
return list(ret)

def __restart_peers(self, deployment_id):
"""
Restart peer-groups with requested deployment_id
:param deployment_id: deployment_id number
"""
log_info("BGPAllowListMgr::Restart peers with deployment_id=%d" % deployment_id)
peer_groups = self.__find_peer_group_by_deployment_id(deployment_id)
rv = True
if peer_groups:
for peer_group in peer_groups:
no_error, _, _ = run_command(["vtysh", "-c", "clear bgp peer-group %s soft in" % peer_group])
rv = no_error == 0 and rv
else:
no_error, _, _ = run_command(["vtysh", "-c", "clear bgp * soft in"])
rv = no_error == 0
return rv

def __get_enabled(self):
"""
Load enable/disabled property from constants
Expand Down
25 changes: 8 additions & 17 deletions src/sonic-bgpcfgd/bgpcfgd/managers_bbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from swsscommon import swsscommon

from .log import log_err, log_info, log_crit
from .log import log_err, log_info
from .manager import Manager
from .utils import run_command


class BBRMgr(Manager):
Expand Down Expand Up @@ -34,12 +33,10 @@ def set_handler(self, key, data):
return True
if not self.__set_validation(key, data):
return True
cmds = self.__set_prepare_config(data['status'])
rv = self.cfg_mgr.push_list(cmds)
if not rv:
log_crit("BBRMgr::can't apply configuration")
return True
self.__restart_peers()
cmds, peer_groups_to_restart = self.__set_prepare_config(data['status'])
self.cfg_mgr.push_list(cmds)
self.cfg_mgr.restart_peer_groups(peer_groups_to_restart)
log_info("BBRMgr::Scheduled BBR update")
return True

def del_handler(self, key):
Expand Down Expand Up @@ -112,12 +109,14 @@ def __set_prepare_config(self, status):
available_peer_groups = self.__get_available_peer_groups()
cmds = ["router bgp %s" % bgp_asn]
prefix_of_commands = "" if status == "enabled" else "no "
peer_groups_to_restart = set()
for af in ["ipv4", "ipv6"]:
cmds.append(" address-family %s" % af)
for pg_name in sorted(self.bbr_enabled_pgs.keys()):
if pg_name in available_peer_groups and af in self.bbr_enabled_pgs[pg_name]:
cmds.append(" %sneighbor %s allowas-in 1" % (prefix_of_commands, pg_name))
return cmds
peer_groups_to_restart.add(pg_name)
return cmds, list(peer_groups_to_restart)

def __get_available_peer_groups(self):
"""
Expand All @@ -132,11 +131,3 @@ def __get_available_peer_groups(self):
if m:
res.add(m.group(1))
return res

def __restart_peers(self):
""" Restart peer-groups which support BBR """
for peer_group in sorted(self.bbr_enabled_pgs.keys()):
rc, out, err = run_command(["vtysh", "-c", "clear bgp peer-group %s soft in" % peer_group])
if rc != 0:
log_value = peer_group, rc, out, err
log_crit("BBRMgr::Can't restart bgp peer-group '%s'. rc='%d', out='%s', err='%s'" % log_value)
Loading

0 comments on commit 148436d

Please sign in to comment.