Skip to content

Commit

Permalink
Improve MIBUpdater to re-connect DBConnector when re-init data. (#290)
Browse files Browse the repository at this point in the history
Improve MIBUpdater to re-connect DBConnector when re-init data.

#### Work item tracking
Microsoft ADO (number only): 24705208

**- What I did**
Fix when redis restart, some MIBUpdater's db connection will broken and keeps report error to syslog issue.

There will be following message repeat in syslog:
```
#12  File "/usr/local/lib/python3.7/dist-packages/sonic_ax_impl/mibs/ietf/rfc2737.py", line 674, in _update_per_namespace_data#
#12    msg = pubsub.get_message()#
#12  File "/usr/lib/python3/dist-packages/swsscommon/swsscommon.py", line 1626, in get_message#
#12    return _swsscommon.PubSub_get_message(self, timeout)#
#012RuntimeError: RedisError: Failed to select, err=3: errstr=Server closed the connection
```

**- How I did it**
Re-connect DBConnector in every MIBUpdater's reinit_data method.

**- How to verify it**
Pass all UT

Manually test with following steps:
1. in database container kill redis server.
2. start redis in database container later:  service redis-server start
2. check syslog, confirm following log exist but now new log few minutes later after mibs re-init:

sudo cat /var/log/syslog | grep rfc2737.py | grep PubSub_get_message

Sep 22 03:30:17.223944 vlab-01 ERR snmp#snmp-subagent [ax_interface] ERROR: MIBUpdater.start() caught an unexpected exception during update_data()#012Traceback (most recent call last):#12  File "/usr/local/lib/python3.9/dist-packages/ax_interface/mib.py", line 43, in start#012    self.update_data()#12  File "/usr/local/lib/python3.9/dist-packages/sonic_ax_impl/mibs/ietf/rfc2737.py", line 326, in update_data#012    updater.update_data(i, self.statedb[i])#12  File "/usr/local/lib/python3.9/dist-packages/sonic_ax_impl/mibs/ietf/rfc2737.py", line 666, in update_data#012    self._update_per_namespace_data(self.pub_sub_dict[db_index])#12  File "/usr/local/lib/python3.9/dist-packages/sonic_ax_impl/mibs/ietf/rfc2737.py", line 675, in _update_per_namespace_data#012    msg = pubsub.get_message()#12  File "/usr/lib/python3/dist-packages/swsscommon/swsscommon.py", line 1996, in get_message#012    return _swsscommon.PubSub_get_message(self, timeout, interrupt_on_signal)#012RuntimeError: RedisError: Failed to select, err=3: errstr=Server closed the connection


**- Description for the changelog**
Improve MIBUpdater to re-connect DBConnector when re-init data.
  • Loading branch information
liuh-80 authored Oct 30, 2023
1 parent bdaddca commit 347a6e4
Show file tree
Hide file tree
Showing 13 changed files with 151 additions and 3 deletions.
18 changes: 18 additions & 0 deletions src/ax_interface/mib.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,29 @@ def __init__(self):

async def start(self):
# Run the update while we are allowed
redis_exception_happen = False
while self.run_event.is_set():
try:
# reinit internal structures
if self.update_counter > self.reinit_rate:
# reconnect when redis exception happen
if redis_exception_happen:
self.reinit_connection()

self.reinit_data()
self.update_counter = 0
else:
self.update_counter += 1

# run the background update task
self.update_data()
redis_exception_happen = False
except RuntimeError:
# Any unexpected exception or error, log it and keep running
logger.exception("MIBUpdater.start() caught an unexpected exception during update_data()")
# When redis server restart, swsscommon will throw swsscommon.RedisError, redis connection need re-initialize in reinit_data()
# TODO: change to swsscommon.RedisError
redis_exception_happen = True
except Exception:
# Any unexpected exception or error, log it and keep running
logger.exception("MIBUpdater.start() caught an unexpected exception during update_data()")
Expand All @@ -55,6 +67,12 @@ def reinit_data(self):
"""
return

def reinit_connection(self):
"""
Reinit redis connection task. Children may override this method.
"""
return

def update_data(self):
"""
Background task. Children must override this method.
Expand Down
6 changes: 6 additions & 0 deletions src/sonic_ax_impl/mibs/ietf/rfc1213.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def __init__(self):
self.nexthop_map = {}
self.route_list = []

def reinit_connection(self):
Namespace.connect_all_dbs(self.db_conn, mibs.APPL_DB)

def update_data(self):
"""
Update redis (caches config)
Expand Down Expand Up @@ -216,6 +219,9 @@ def __init__(self):

self.namespace_db_map = Namespace.get_namespace_db_map(self.db_conn)

def reinit_connection(self):
Namespace.connect_namespace_dbs(self.db_conn)

def reinit_data(self):
"""
Subclass update interface information
Expand Down
3 changes: 3 additions & 0 deletions src/sonic_ax_impl/mibs/ietf/rfc2737.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ def create_physical_entity_updaters(self):
"""
return [creator(self) for creator in PhysicalTableMIBUpdater.physical_entity_updater_types]

def reinit_connection(self):
Namespace.connect_all_dbs(self.statedb, mibs.STATE_DB)

def reinit_data(self):
"""
Re-initialize all data.
Expand Down
3 changes: 3 additions & 0 deletions src/sonic_ax_impl/mibs/ietf/rfc2863.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def __init__(self):

self.namespace_db_map = Namespace.get_namespace_db_map(self.db_conn)

def reinit_connection(self):
Namespace.connect_namespace_dbs(self.db_conn)

def reinit_data(self):
"""
Subclass update interface information
Expand Down
4 changes: 3 additions & 1 deletion src/sonic_ax_impl/mibs/ietf/rfc3433.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,9 @@ def __init__(self):
self.thermal_sensor = []
self.broken_transceiver_info = []

def reinit_connection(self):
Namespace.connect_all_dbs(self.statedb, mibs.STATE_DB)

def reinit_data(self):
"""
Reinit data, clear cache
Expand All @@ -385,7 +388,6 @@ def reinit_data(self):
self.ent_phy_sensor_precision_map = {}
self.ent_phy_sensor_value_map = {}
self.ent_phy_sensor_oper_state_map = {}

transceiver_dom_encoded = Namespace.dbs_keys(self.statedb, mibs.STATE_DB, self.TRANSCEIVER_DOM_KEY_PATTERN)
if transceiver_dom_encoded:
self.transceiver_dom = [entry for entry in transceiver_dom_encoded]
Expand Down
3 changes: 3 additions & 0 deletions src/sonic_ax_impl/mibs/ietf/rfc4292.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def __init__(self):
## loopback ip string -> ip address object
self.loips = {}

def reinit_connection(self):
Namespace.connect_all_dbs(self.db_conn, mibs.APPL_DB)

def reinit_data(self):
"""
Subclass update loopback information
Expand Down
3 changes: 3 additions & 0 deletions src/sonic_ax_impl/mibs/ietf/rfc4363.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def fdb_vlanmac(self, fdb):
return None
return (int(vlan_id),) + mac_decimals(fdb["mac"])

def reinit_connection(self):
Namespace.connect_namespace_dbs(self.db_conn)

def reinit_data(self):
"""
Subclass update interface information
Expand Down
3 changes: 3 additions & 0 deletions src/sonic_ax_impl/mibs/vendor/cisco/ciscoPfcExtMIB.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def __init__(self):
self.if_range = []
self.namespace_db_map = Namespace.get_namespace_db_map(self.db_conn)

def reinit_connection(self):
Namespace.connect_namespace_dbs(self.db_conn)

def reinit_data(self):
"""
Subclass update interface information
Expand Down
3 changes: 3 additions & 0 deletions src/sonic_ax_impl/mibs/vendor/cisco/ciscoSwitchQosMIB.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def __init__(self):
self.port_index_namespace = {}
self.namespace_db_map = Namespace.get_namespace_db_map(self.db_conn)

def reinit_connection(self):
Namespace.connect_namespace_dbs(self.db_conn)

def reinit_data(self):
"""
Subclass update interface information
Expand Down
64 changes: 63 additions & 1 deletion tests/test_rfc1213.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import asyncio
import os
import sonic_ax_impl
import sys
from unittest import TestCase

Expand All @@ -10,7 +12,8 @@
modules_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(modules_path, 'src'))

from sonic_ax_impl.mibs.ietf.rfc1213 import NextHopUpdater
from sonic_ax_impl.mibs.ietf.rfc1213 import NextHopUpdater, InterfacesUpdater


class TestNextHopUpdater(TestCase):

Expand Down Expand Up @@ -43,3 +46,62 @@ def test_NextHopUpdater_route_no_next_hop(self):
mocked_warning.assert_has_calls(expected)

self.assertTrue(len(updater.route_list) == 0)


class TestNextHopUpdaterRedisException(TestCase):
def __init__(self, name):
super().__init__(name)
self.throw_exception = True
self.updater = NextHopUpdater()

# setup mock method, throw exception when first time call it
def mock_dbs_keys(self, *args, **kwargs):
if self.throw_exception:
self.throw_exception = False
raise RuntimeError

self.updater.run_event.clear()
return None

@mock.patch('sonic_ax_impl.mibs.Namespace.dbs_get_all', mock.MagicMock(return_value=({"ifname": "Ethernet0,Ethernet4"})))
def test_NextHopUpdater_redis_exception(self):
with mock.patch('sonic_ax_impl.mibs.Namespace.dbs_keys', self.mock_dbs_keys):
with mock.patch('ax_interface.logger.exception') as mocked_exception:
self.updater.run_event.set()
self.updater.frequency = 1
self.updater.reinit_rate = 1
self.updater.update_counter = 1
loop = asyncio.get_event_loop()
loop.run_until_complete(self.updater.start())
loop.close()

# check warning
expected = [
mock.call("MIBUpdater.start() caught an unexpected exception during update_data()")
]
mocked_exception.assert_has_calls(expected)


@mock.patch('sonic_ax_impl.mibs.init_mgmt_interface_tables', mock.MagicMock(return_value=([{}, {}])))
def test_InterfacesUpdater_re_init_redis_exception(self):

def mock_get_sync_d_from_all_namespace(per_namespace_func, db_conn):
if per_namespace_func == sonic_ax_impl.mibs.init_sync_d_interface_tables:
return [{}, {}, {}, {}]

if per_namespace_func == sonic_ax_impl.mibs.init_sync_d_vlan_tables:
return [{}, {}, {}]

if per_namespace_func == sonic_ax_impl.mibs.init_sync_d_rif_tables:
return [{}, {}]

return [{}, {}, {}, {}, {}]

updater = InterfacesUpdater()
with mock.patch('sonic_ax_impl.mibs.Namespace.get_sync_d_from_all_namespace', mock_get_sync_d_from_all_namespace):
with mock.patch('sonic_ax_impl.mibs.Namespace.connect_namespace_dbs') as connect_namespace_dbs:
updater.reinit_connection()
updater.reinit_data()

# check re-init
connect_namespace_dbs.assert_called()
13 changes: 12 additions & 1 deletion tests/test_rfc3433.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,15 @@ def test_PhysicalSensorTableMIBUpdater_transceiver_info_key_missing(self):
# check warning
mocked_warn.assert_called()

self.assertTrue(len(updater.sub_ids) == 0)
self.assertTrue(len(updater.sub_ids) == 0)

@mock.patch('sonic_ax_impl.mibs.Namespace.dbs_keys', mock.MagicMock(return_value=(None)))
@mock.patch('swsscommon.swsscommon.SonicV2Connector.keys', mock.MagicMock(return_value=(None)))
def test_PhysicalSensorTableMIBUpdater_re_init_redis_exception(self):
updater = PhysicalSensorTableMIBUpdater()

with mock.patch('sonic_ax_impl.mibs.Namespace.connect_all_dbs') as connect_all_dbs:
updater.reinit_connection()

# check re-init
connect_all_dbs.assert_called()
11 changes: 11 additions & 0 deletions tests/test_rfc4292.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,14 @@ def test_RouteUpdater_route_no_iframe(self):
mocked_warning.assert_has_calls(expected)

self.assertTrue(len(updater.route_dest_list) == 0)


@mock.patch('sonic_ax_impl.mibs.Namespace.dbs_keys', mock.MagicMock(return_value=(None)))
def test_RouteUpdater_re_init_redis_exception(self):
updater = RouteUpdater()

with mock.patch('sonic_ax_impl.mibs.Namespace.connect_all_dbs') as connect_all_dbs:
updater.reinit_connection()

# check re-init
connect_all_dbs.assert_called()
20 changes: 20 additions & 0 deletions tests/test_rfc4363.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import sys
import sonic_ax_impl
from unittest import TestCase

if sys.version_info.major == 3:
Expand All @@ -26,3 +27,22 @@ def test_FdbUpdater_ent_bridge_port_id_attr_missing(self):
mocked_warn.assert_called()

self.assertTrue(len(updater.vlanmac_ifindex_list) == 0)


@mock.patch('sonic_ax_impl.mibs.Namespace.dbs_keys', mock.MagicMock(return_value=(None)))
@mock.patch('sonic_ax_impl.mibs.Namespace.dbs_get_bridge_port_map', mock.MagicMock(return_value=(None)))
def test_RouteUpdater_re_init_redis_exception(self):
updater = FdbUpdater()

def mock_get_sync_d_from_all_namespace(per_namespace_func, db_conn):
if per_namespace_func == sonic_ax_impl.mibs.init_sync_d_interface_tables:
return [{}, {}, {}, {}]

return [{}, {}, {}, {}, {}]

with mock.patch('sonic_ax_impl.mibs.Namespace.get_sync_d_from_all_namespace', mock_get_sync_d_from_all_namespace):
with mock.patch('sonic_ax_impl.mibs.Namespace.connect_namespace_dbs') as connect_namespace_dbs:
updater.reinit_connection()

# check re-init
connect_namespace_dbs.assert_called()

0 comments on commit 347a6e4

Please sign in to comment.