Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[docker-fpm-frr/bgpcfgd]: Update interface of bgpcfgd from swsssdk to swsscommon #3264

Merged
merged 6 commits into from
Aug 29, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 106 additions & 47 deletions dockers/docker-fpm-frr/bgpcfgd
Original file line number Diff line number Diff line change
@@ -1,65 +1,124 @@
#!/usr/bin/env python

import sys
import copy
jleveque marked this conversation as resolved.
Show resolved Hide resolved
import Queue
import redis
import subprocess
import syslog
from swsssdk import ConfigDBConnector
import os
from swsscommon import swsscommon

class BGPConfigDaemon:

def run_command(command):
syslog.syslog(syslog.LOG_DEBUG, "execute command {}.".format(command))
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
stdout = p.communicate()[0]
p.wait()
if p.returncode != 0:
syslog.syslog(syslog.LOG_ERR, 'command execution returned {}. Command: "{}", stdout: "{}"'.format(p.returncode, command, stdout))


class BGPConfigManager(object):
def __init__(self, daemon):
self.daemon = daemon
self.bgp_asn = None
self.bgp_message = Queue.Queue(0)
daemon.add_manager(swsscommon.CONFIG_DB, swsscommon.CFG_DEVICE_METADATA_TABLE_NAME, self.__metadata_handler)
daemon.add_manager(swsscommon.CONFIG_DB, swsscommon.CFG_BGP_NEIGHBOR_TABLE_NAME, self.__bgp_handler)

def __metadata_handler(self, key, op, data):
jleveque marked this conversation as resolved.
Show resolved Hide resolved
if key != "localhost" \
or "bgp_asn" not in data \
or self.bgp_asn == data["bgp_asn"]:
return

# TODO add ASN update commands

self.bgp_asn = data["bgp_asn"]
self.__update_bgp()

def __update_bgp(self):
while not self.bgp_message.empty():
key, op, data = self.bgp_message.get()
syslog.syslog(syslog.LOG_INFO, 'value for {} changed to {}'.format(key, data))
if op == swsscommon.SET_COMMAND:
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c 'neighbor {} remote-as {}'".format(self.bgp_asn, key, data['asn'])
run_command(command)
if "name" in data:
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c 'neighbor {} description {}'".format(self.bgp_asn, key, data['name'])
run_command(command)
lguohan marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning does nothing to handle the failure case. This is inherently problematic because it allows for DB to be out of sync with running config of the routing application. The config event will be lost and the only way to recover this (since it's not saved in startup config either) is to restart bgpcfgd. In general this code doesn't seem to be robust and the requirements are not well defined/clear.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a configuration, if the command is not correct it means the configuration is incorrect. what do you mean by robust? Since it is configuration issue, the issue is outside this program, the right thing to do for this program is to log error. If you have better suggestion, we would appreciate to hear.

Copy link
Collaborator

@nikos-github nikos-github Aug 16, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If for whatever reason BGP is restarting or is not running, I doubt the command will go through even if correct. Basically there are race conditions where the command may be correct, but there will be a failure that is not handled and will leave the system in an inconsistent state. The only way to recover would be to restart bgpcfgd. In order to provide a suggestion, can you please explain what is the purpose of bgpcfgd?

In comparison to what the jinja templates have, its functionality seems very incomplete overall for anyone in actual deployments and can not possibly work the same way with the templates when BGP restarts because of the missing functionality the templates provide and bgpcfgd doesn't.

If it is for unit testing purposes, then it shouldn't be part of the image.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what you suggest is valid concern, but it is out of scope of this refactoring. the bgpcfgd allow us to shutdown/enable bgp session.

if "admin_status" in data:
command_mod = "no " if data["admin_status"] == "up" else ""
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c '{}neighbor {} shutdown'".format(self.bgp_asn, command_mod, key)
run_command(command)
elif op == swsscommon.DEL_COMMAND:
# Neighbor is deleted
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c 'no neighbor {}'".format(self.bgp_asn, key)
lguohan marked this conversation as resolved.
Show resolved Hide resolved
run_command(command)

def __bgp_handler(self, key, op, data):
self.bgp_message.put((key, op, data))
# If ASN is not set, we just cache this message until the ASN is set.
if self.bgp_asn == None:
return
self.__update_bgp()


class Daemon(object):

SELECT_TIMEOUT = 1000
SUPPORT_DATABASE_LIST = (swsscommon.APPL_DB, swsscommon.CONFIG_DB)

def __init__(self):
self.config_db = ConfigDBConnector()
self.config_db.connect()
self.bgp_asn = self.config_db.get_entry('DEVICE_METADATA', 'localhost')['bgp_asn']
self.bgp_neighbor = self.config_db.get_table('BGP_NEIGHBOR')

def __run_command(self, command):
# print command
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
stdout = p.communicate()[0]
p.wait()
if p.returncode != 0:
syslog.syslog(syslog.LOG_ERR, '[bgp cfgd] command execution returned {}. Command: "{}", stdout: "{}"'.format(p.returncode, command, stdout))

def metadata_handler(self, key, data):
if key == 'localhost' and data.has_key('bgp_asn'):
if data['bgp_asn'] != self.bgp_asn:
syslog.syslog(syslog.LOG_INFO, '[bgp cfgd] ASN changed to {} from {}, restart BGP...'.format(data['bgp_asn'], self.bgp_asn))
self.__run_command("supervisorctl restart start.sh")
self.__run_command("service quagga restart")
self.bgp_asn = data['bgp_asn']

def bgp_handler(self, key, data):
syslog.syslog(syslog.LOG_INFO, '[bgp cfgd] value for {} changed to {}'.format(key, data))
if not data:
# Neighbor is deleted
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c 'no neighbor {}'".format(self.bgp_asn, key)
self.__run_command(command)
self.bgp_neighbor.pop(key)
else:
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c 'neighbor {} remote-as {}'".format(self.bgp_asn, key, data['asn'])
self.__run_command(command)
if data.has_key('name'):
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c 'neighbor {} description {}'".format(self.bgp_asn, key, data['name'])
self.__run_command(command)
if data.has_key('admin_status'):
command_mod = 'no ' if data['admin_status'] == 'up' else ''
command = "vtysh -c 'configure terminal' -c 'router bgp {}' -c '{}neighbor {} shutdown'".format(self.bgp_asn, command_mod, key)
self.__run_command(command)
self.bgp_neighbor[key] = data
self.appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, swsscommon.DBConnector.DEFAULT_UNIXSOCKET, 0)
self.conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, swsscommon.DBConnector.DEFAULT_UNIXSOCKET, 0)
self.selector = swsscommon.Select()
self.db_connectors = {}
self.callbacks = {}
self.subscribers = set()

def get_db_connector(self, db):
if db not in Daemon.SUPPORT_DATABASE_LIST:
raise ValueError("database {} not Daemon support list {}.".format(db, SUPPORT_DATABASE_LIST))
# if this database connector has been initialized
if db not in self.db_connectors:
self.db_connectors[db] = swsscommon.DBConnector(db, swsscommon.DBConnector.DEFAULT_UNIXSOCKET, 0)
return self.db_connectors[db]

def add_manager(self, db, table_name, callback):
if db not in self.callbacks:
self.callbacks[db] = {}
if table_name not in self.callbacks[db]:
self.callbacks[db][table_name] = []
conn = self.get_db_connector(db)
subscriber = swsscommon.SubscriberStateTable(conn, table_name)
self.subscribers.add(subscriber)
self.selector.addSelectable(subscriber)
self.callbacks[db][table_name].append(callback)

def start(self):
self.config_db.subscribe('BGP_NEIGHBOR',
lambda table, key, data: self.bgp_handler(key, data))
self.config_db.subscribe('DEVICE_METADATA',
lambda table, key, data: self.metadata_handler(key, data))
self.config_db.listen()
while True:
state, selectable = self.selector.select(Daemon.SELECT_TIMEOUT)
if not selectable:
continue
for subscriber in self.subscribers:
key, op, fvs = subscriber.pop()
# if no new message
if not key:
continue
data = dict(fvs)
syslog.syslog(syslog.LOG_DEBUG, "Receive message : {}".format((key, op, fvs)))
for callback in self.callbacks[subscriber.getDbConnector().getDbId()][subscriber.getTableName()]:
callback(key, op, data)


def main():
daemon = BGPConfigDaemon()
syslog.openlog("bgpcfgd")
daemon = Daemon()
bgp_manager = BGPConfigManager(daemon)
daemon.start()
syslog.closelog()

if __name__ == "__main__":
main()