From 6522c4650a5ca499c95b1dec70615b88366641fe Mon Sep 17 00:00:00 2001 From: vdahiya12 <67608553+vdahiya12@users.noreply.github.com> Date: Wed, 21 Sep 2022 13:18:25 -0700 Subject: [PATCH] [ycabled] add notification for gRPC connection state transitions to IDLE/TRANSIENT_FAILURE (#295) Signed-off-by: vaibhav-dahiya vdahiya@microsoft.com For the cases where gRPC connectivity to server becomes IDLE/TRANSIENT_FAILURE, the ToR should periodically query/retry establish admin state and revert the Forwarding state back to active-active for both the T0's This PR tries to attain that via adding a transient failure message to APP DB when connectivity is lost, so each time connectivity state changes back to not desired state(IDLE/TRANSIENT_FAILURE) we log a message, and linkmgr will query again to get gRPC in sync Description Motivation and Context How Has This Been Tested? Unit-tests and deploying changes to testbed Additional Information (Optional) --- .../ycable/ycable_utilities/y_cable_helper.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py b/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py index ffe82915ee23..5f412fa9cf46 100644 --- a/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py +++ b/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py @@ -424,16 +424,36 @@ def connect_channel(channel, stub, port): def create_channel(type, level, kvp, soc_ip, port): - #Helper callback to get an channel connectivity state + appl_db = {} + fwd_state_response_tbl = {} + namespaces = multi_asic.get_front_end_namespaces() + for namespace in namespaces: + # Open a handle to the Application database, in all namespaces + asic_id = multi_asic.get_asic_index_from_namespace(namespace) + appl_db[asic_id] = daemon_base.db_connect("APPL_DB", namespace) + fwd_state_response_tbl[asic_id] = swsscommon.Table( + appl_db[asic_id], "FORWARDING_STATE_RESPONSE") + + asic_index = y_cable_platform_sfputil.get_asic_id_for_logical_port(port) + + # Helper callback to get an channel connectivity state def wait_for_state_change(channel_connectivity): if channel_connectivity == grpc.ChannelConnectivity.TRANSIENT_FAILURE: helper_logger.log_notice("gRPC port {} state changed to TRANSIENT_FAILURE".format(port)) + # for connectivity state to FAILURE/IDLE report a failure + fvs_updated = swsscommon.FieldValuePairs([('response', 'failure')]) + fwd_state_response_tbl[asic_index].set(port, fvs_updated) + if channel_connectivity == grpc.ChannelConnectivity.CONNECTING: helper_logger.log_notice("gRPC port {} state changed to CONNECTING".format(port)) if channel_connectivity == grpc.ChannelConnectivity.READY: helper_logger.log_notice("gRPC port {} state changed to READY".format(port)) if channel_connectivity == grpc.ChannelConnectivity.IDLE: helper_logger.log_notice("gRPC port {} state changed to IDLE".format(port)) + # for connectivity state to FAILURE/IDLE report a failure + fvs_updated = swsscommon.FieldValuePairs([('response', 'failure')]) + fwd_state_response_tbl[asic_index].set(port, fvs_updated) + if channel_connectivity == grpc.ChannelConnectivity.SHUTDOWN: helper_logger.log_notice("gRPC port {} state changed to SHUTDOWN".format(port))