Skip to content

Commit

Permalink
[xcvrd] Return non-zero error code on SFP error (zhenggen-xu#67)
Browse files Browse the repository at this point in the history
Currently when there is an error event in xcvrd within SFP state machine, it returns with 0 exit code. With this change the SFP if it has an error event with return with non-zero exit code.

Signed-off-by: vaibhav-dahiya <vdahiya@microsoft.com>
  • Loading branch information
vdahiya12 authored Jul 18, 2020
1 parent e665ee8 commit 4f42a79
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions sonic-xcvrd/scripts/xcvrd
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ SFP_EEPROM_NOT_READY = -2
SFPUTIL_LOAD_ERROR = 1
PORT_CONFIG_LOAD_ERROR = 2
NOT_IMPLEMENTED_ERROR = 3
SFP_SYSTEM_ERROR = 4

RETRY_TIMES_FOR_SYSTEM_READY = 24
RETRY_PERIOD_FOR_SYSTEM_READY_MSECS = 5000
Expand Down Expand Up @@ -801,7 +802,7 @@ class sfp_state_update_task:
logger.log_debug("mapping from {} {} to {}".format(status, port_dict, event))
return event

def task_worker(self, stopping_event):
def task_worker(self, stopping_event, sfp_error_event):
logger.log_info("Start SFP monitoring loop")

transceiver_dict = {}
Expand Down Expand Up @@ -898,6 +899,7 @@ class sfp_state_update_task:
if retry >= RETRY_TIMES_FOR_SYSTEM_READY:
logger.log_error("System failed to get ready in {} secs or received system error. Exiting...".format((RETRY_PERIOD_FOR_SYSTEM_READY_MSECS/1000)*RETRY_TIMES_FOR_SYSTEM_READY))
next_state = STATE_EXIT
sfp_error_event.set()
else:
retry = retry + 1

Expand Down Expand Up @@ -985,6 +987,7 @@ class sfp_state_update_task:
if retry >= RETRY_TIMES_FOR_SYSTEM_FAIL:
logger.log_error("System failed to recover in {} secs. Exiting...".format((RETRY_PERIOD_FOR_SYSTEM_FAIL_MSECS/1000)*RETRY_TIMES_FOR_SYSTEM_FAIL))
next_state = STATE_EXIT
sfp_error_event.set()
else:
retry = retry + 1
waiting_time_compensation_with_sleep(time_start, RETRY_PERIOD_FOR_SYSTEM_FAIL_MSECS/1000)
Expand All @@ -1010,11 +1013,11 @@ class sfp_state_update_task:

logger.log_info("Stop SFP monitoring loop")

def task_run(self):
def task_run(self, sfp_error_event):
if self.task_stopping_event.is_set():
return

self.task_process = multiprocessing.Process(target=self.task_worker,args=(self.task_stopping_event,))
self.task_process = multiprocessing.Process(target=self.task_worker,args=(self.task_stopping_event, sfp_error_event))
self.task_process.start()

def task_stop(self):
Expand All @@ -1031,6 +1034,7 @@ class DaemonXcvrd(DaemonBase):

self.timeout = XCVRD_MAIN_THREAD_SLEEP_SECS
self.stop_event = threading.Event()
self.sfp_error_event = multiprocessing.Event()

# Signal handler
def signal_handler(self, sig, frame):
Expand Down Expand Up @@ -1166,7 +1170,7 @@ class DaemonXcvrd(DaemonBase):

# Start the sfp state info update process
sfp_state_update = sfp_state_update_task()
sfp_state_update.task_run()
sfp_state_update.task_run(self.sfp_error_event)

# Start main loop
logger.log_info("Start daemon main loop")
Expand All @@ -1188,6 +1192,9 @@ class DaemonXcvrd(DaemonBase):

logger.log_info("Shutting down...")

if self.sfp_error_event.is_set():
sys.exit(SFP_SYSTEM_ERROR)

#
# Main =========================================================================
#
Expand Down

0 comments on commit 4f42a79

Please sign in to comment.