From 66a9a25e80b694dbcdb86151b6012811bfb8a833 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 12 Dec 2019 09:55:47 +0800 Subject: [PATCH 1/4] [process-reboot-cause]Address the issue: Incorrect reboot cause returned when warm reboot follows a hardware caused reboot 1. check whether /proc/cmdline indicates warm/fast reboot. if yes the software reboot cause file will be treated as the reboot cause. finish 2. check whether platform api returns a reboot cause. if yes it is treated as the reboot cause. finish. 3. check whether /hosts/reboot-cause contains a cause. if yes it is treated as the cause otherwise return unknown. --- .../process-reboot-cause/process-reboot-cause | 72 +++++++++++++------ 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/files/image_config/process-reboot-cause/process-reboot-cause b/files/image_config/process-reboot-cause/process-reboot-cause index 49cfa752641f..e876afe343f8 100755 --- a/files/image_config/process-reboot-cause/process-reboot-cause +++ b/files/image_config/process-reboot-cause/process-reboot-cause @@ -11,6 +11,7 @@ try: import pwd import sys import syslog + import re except ImportError as err: raise ImportError("%s - required module not found" % str(err)) @@ -22,6 +23,9 @@ REBOOT_CAUSE_DIR = "/host/reboot-cause/" REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt" PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt" FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform" +REBOOT_CAUSE_KEXEC = "/proc/cmdline" +# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities +REBOOT_CAUSE_KEXEC_PATTERN = ".*SONIC_BOOT_TYPE=(fast-reboot|warm|fastfast)$" UNKNOWN_REBOOT_CAUSE = "Unknown" @@ -47,6 +51,16 @@ def log_error(msg): # ============================= Functions ============================= +def is_warmfast_reboot_from_proc_cmdline(): + if os.path.isfile(REBOOT_CAUSE_KEXEC): + cause_file = open(REBOOT_CAUSE_KEXEC, "r") + cause_file_kexec = cause_file.readline() + m = re.match(REBOOT_CAUSE_KEXEC_PATTERN, cause_file_kexec) + if m and m.group(1): + # the pattern matched so it's a fast/warm reboot + return True + return False + def main(): log_info("Starting up...") @@ -73,31 +87,49 @@ def main(): try: import sonic_platform - # Check if the previous reboot was caused by hardware - platform = sonic_platform.platform.Platform() - - chassis = platform.get_chassis() + proc_cmdline_reboot_cause = None - hardware_reboot_cause, optional_details = chassis.get_reboot_cause() - - if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE: - # The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will - # contain any software-related reboot info. We will use it as the previous cause. + # 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline + # If yes, the content of /hosts/reboot-cause/reboot-cause.txt will be treated as the reboot cause + if is_warmfast_reboot_from_proc_cmdline(): if os.path.isfile(REBOOT_CAUSE_FILE): cause_file = open(REBOOT_CAUSE_FILE, "r") - previous_reboot_cause = cause_file.readline().rstrip('\n') + proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n') cause_file.close() - # If it is FirstTime Boot and previous_reboot_cause is unknown - # and hardware_reboot cause is non_hardware then - # Update the reboot cause as required - if os.path.isfile(FIRST_BOOT_PLATFORM_FILE): - if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE): - previous_reboot_cause = UNKNOWN_REBOOT_CAUSE - os.remove(FIRST_BOOT_PLATFORM_FILE) - elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER: - previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details) + else: + # /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't agree. + # report an error. + log_error("/proc/cmdline indicates a fast/warm reboot but {} doesn't exist".format(REBOOT_CAUSE_DIR)) + + if proc_cmdline_reboot_cause: + previous_reboot_cause = proc_cmdline_reboot_cause else: - previous_reboot_cause = hardware_reboot_cause + # 2. Check if the previous reboot was caused by hardware + # If yes, the hardware reboot cause will be treated as teh reboot cause + platform = sonic_platform.platform.Platform() + + chassis = platform.get_chassis() + + hardware_reboot_cause, optional_details = chassis.get_reboot_cause() + + if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE: + # The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will + # contain any software-related reboot info. We will use it as the previous cause. + if os.path.isfile(REBOOT_CAUSE_FILE): + cause_file = open(REBOOT_CAUSE_FILE, "r") + previous_reboot_cause = cause_file.readline().rstrip('\n') + cause_file.close() + # If it is FirstTime Boot and previous_reboot_cause is unknown + # and hardware_reboot cause is non_hardware then + # Update the reboot cause as required + if os.path.isfile(FIRST_BOOT_PLATFORM_FILE): + if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE): + previous_reboot_cause = UNKNOWN_REBOOT_CAUSE + os.remove(FIRST_BOOT_PLATFORM_FILE) + elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER: + previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details) + else: + previous_reboot_cause = hardware_reboot_cause except ImportError as err: log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.") From 757d82bb8b403e0ef4a6b810deb0371856a8838f Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 12 Dec 2019 11:17:31 +0800 Subject: [PATCH 2/4] [process-reboot-cause]Fix review comments --- files/image_config/process-reboot-cause/process-reboot-cause | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/files/image_config/process-reboot-cause/process-reboot-cause b/files/image_config/process-reboot-cause/process-reboot-cause index e876afe343f8..ed00ccc6d164 100755 --- a/files/image_config/process-reboot-cause/process-reboot-cause +++ b/files/image_config/process-reboot-cause/process-reboot-cause @@ -97,11 +97,11 @@ def main(): proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n') cause_file.close() else: - # /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't agree. + # /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't exist. # report an error. log_error("/proc/cmdline indicates a fast/warm reboot but {} doesn't exist".format(REBOOT_CAUSE_DIR)) - if proc_cmdline_reboot_cause: + if proc_cmdline_reboot_cause is not None: previous_reboot_cause = proc_cmdline_reboot_cause else: # 2. Check if the previous reboot was caused by hardware From 70a7df19cc59cf4f47c98ee3eab2c1f97268a415 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 12 Dec 2019 22:01:55 +0800 Subject: [PATCH 3/4] [process-reboot-cause]address comments 1. use "with" statement 2. update fast/warm reboot BOOT_ARG --- .../process-reboot-cause/process-reboot-cause | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/files/image_config/process-reboot-cause/process-reboot-cause b/files/image_config/process-reboot-cause/process-reboot-cause index ed00ccc6d164..a93dfa12dec5 100755 --- a/files/image_config/process-reboot-cause/process-reboot-cause +++ b/files/image_config/process-reboot-cause/process-reboot-cause @@ -25,7 +25,7 @@ PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt" FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform" REBOOT_CAUSE_KEXEC = "/proc/cmdline" # The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities -REBOOT_CAUSE_KEXEC_PATTERN = ".*SONIC_BOOT_TYPE=(fast-reboot|warm|fastfast)$" +REBOOT_CAUSE_KEXEC_PATTERN = ".*SONIC_BOOT_TYPE=(fast-reboot|warm|fastfast|fast).*" UNKNOWN_REBOOT_CAUSE = "Unknown" @@ -53,8 +53,8 @@ def log_error(msg): # ============================= Functions ============================= def is_warmfast_reboot_from_proc_cmdline(): if os.path.isfile(REBOOT_CAUSE_KEXEC): - cause_file = open(REBOOT_CAUSE_KEXEC, "r") - cause_file_kexec = cause_file.readline() + with open(REBOOT_CAUSE_KEXEC, "r") as cause_file: + cause_file_kexec = cause_file.readline() m = re.match(REBOOT_CAUSE_KEXEC_PATTERN, cause_file_kexec) if m and m.group(1): # the pattern matched so it's a fast/warm reboot @@ -93,9 +93,8 @@ def main(): # If yes, the content of /hosts/reboot-cause/reboot-cause.txt will be treated as the reboot cause if is_warmfast_reboot_from_proc_cmdline(): if os.path.isfile(REBOOT_CAUSE_FILE): - cause_file = open(REBOOT_CAUSE_FILE, "r") - proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n') - cause_file.close() + with open(REBOOT_CAUSE_FILE, "r") as cause_file: + proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n') else: # /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't exist. # report an error. @@ -116,9 +115,8 @@ def main(): # The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will # contain any software-related reboot info. We will use it as the previous cause. if os.path.isfile(REBOOT_CAUSE_FILE): - cause_file = open(REBOOT_CAUSE_FILE, "r") - previous_reboot_cause = cause_file.readline().rstrip('\n') - cause_file.close() + with open(REBOOT_CAUSE_FILE, "r") as cause_file: + previous_reboot_cause = cause_file.readline().rstrip('\n') # If it is FirstTime Boot and previous_reboot_cause is unknown # and hardware_reboot cause is non_hardware then # Update the reboot cause as required @@ -136,9 +134,8 @@ def main(): # If there is a REBOOT_CAUSE_FILE, it will contain any software-related # reboot info. We will use it as the previous cause. if os.path.isfile(REBOOT_CAUSE_FILE): - cause_file = open(REBOOT_CAUSE_FILE, "r") - previous_reboot_cause = cause_file.readline().rstrip('\n') - cause_file.close() + with open(REBOOT_CAUSE_FILE, "r") as cause_file: + previous_reboot_cause = cause_file.readline().rstrip('\n') # If it is FirstTime Boot and previous_reboot_cause is unknown # Update the reboot cause as required @@ -147,9 +144,8 @@ def main(): previous_reboot_cause = UNKNOWN_REBOOT_CAUSE os.remove(FIRST_BOOT_PLATFORM_FILE) # Write the previous reboot cause to PREVIOUS_REBOOT_CAUSE_FILE - prev_cause_file = open(PREVIOUS_REBOOT_CAUSE_FILE, "w") - prev_cause_file.write(previous_reboot_cause) - prev_cause_file.close() + with open(PREVIOUS_REBOOT_CAUSE_FILE, "w") as prev_cause_file: + prev_cause_file.write(previous_reboot_cause) # Also log the previous reboot cause to the syslog log_info("Previous reboot cause: {}".format(previous_reboot_cause)) @@ -159,9 +155,8 @@ def main(): os.remove(REBOOT_CAUSE_FILE) # Write a new default reboot cause file for the next reboot - cause_file = open(REBOOT_CAUSE_FILE, "w") - cause_file.write(UNKNOWN_REBOOT_CAUSE) - cause_file.close() + with open(REBOOT_CAUSE_FILE, "w") as cause_file: + cause_file.write(UNKNOWN_REBOOT_CAUSE) if __name__ == "__main__": From 0ea70898620b9e8e8fa5e995c2b0679216d8406d Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Fri, 13 Dec 2019 09:30:37 +0800 Subject: [PATCH 4/4] [process-reboot-cause]address comments --- .../process-reboot-cause/process-reboot-cause | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/files/image_config/process-reboot-cause/process-reboot-cause b/files/image_config/process-reboot-cause/process-reboot-cause index a93dfa12dec5..a90f36932485 100755 --- a/files/image_config/process-reboot-cause/process-reboot-cause +++ b/files/image_config/process-reboot-cause/process-reboot-cause @@ -23,9 +23,15 @@ REBOOT_CAUSE_DIR = "/host/reboot-cause/" REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt" PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt" FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform" -REBOOT_CAUSE_KEXEC = "/proc/cmdline" +REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline" # The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities -REBOOT_CAUSE_KEXEC_PATTERN = ".*SONIC_BOOT_TYPE=(fast-reboot|warm|fastfast|fast).*" +# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration. +# On 201803, 201807 we have +# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot" +# On 201811 and later we have +# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast +# To extract the commom part of them, we should have the following PATTERN +REBOOT_TYPE_KEXEC_PATTERN = ".*(fast-reboot|warm|fastfast|fast).*" UNKNOWN_REBOOT_CAUSE = "Unknown" @@ -52,10 +58,10 @@ def log_error(msg): # ============================= Functions ============================= def is_warmfast_reboot_from_proc_cmdline(): - if os.path.isfile(REBOOT_CAUSE_KEXEC): - with open(REBOOT_CAUSE_KEXEC, "r") as cause_file: + if os.path.isfile(REBOOT_TYPE_KEXEC_FILE): + with open(REBOOT_TYPE_KEXEC_FILE, "r") as cause_file: cause_file_kexec = cause_file.readline() - m = re.match(REBOOT_CAUSE_KEXEC_PATTERN, cause_file_kexec) + m = re.match(REBOOT_TYPE_KEXEC_PATTERN, cause_file_kexec) if m and m.group(1): # the pattern matched so it's a fast/warm reboot return True