Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[process-reboot-cause]Address the issue: Incorrect reboot cause returned when warm reboot follows a hardware caused reboot #21

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 64 additions & 31 deletions files/image_config/process-reboot-cause/process-reboot-cause
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ try:
import pwd
import sys
import syslog
import re
except ImportError as err:
raise ImportError("%s - required module not found" % str(err))

Expand All @@ -22,6 +23,15 @@ REBOOT_CAUSE_DIR = "/host/reboot-cause/"
REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt"
PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt"
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration.
# On 201803, 201807 we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot"
# On 201811 and later we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast
# To extract the commom part of them, we should have the following PATTERN
REBOOT_TYPE_KEXEC_PATTERN = ".*(fast-reboot|warm|fastfast|fast).*"

UNKNOWN_REBOOT_CAUSE = "Unknown"

Expand All @@ -47,6 +57,16 @@ def log_error(msg):


# ============================= Functions =============================
def is_warmfast_reboot_from_proc_cmdline():
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
with open(REBOOT_TYPE_KEXEC_FILE, "r") as cause_file:
cause_file_kexec = cause_file.readline()
m = re.match(REBOOT_TYPE_KEXEC_PATTERN, cause_file_kexec)
if m and m.group(1):
# the pattern matched so it's a fast/warm reboot
return True
return False


def main():
log_info("Starting up...")
Expand All @@ -73,40 +93,55 @@ def main():
try:
import sonic_platform

# Check if the previous reboot was caused by hardware
platform = sonic_platform.platform.Platform()

chassis = platform.get_chassis()

hardware_reboot_cause, optional_details = chassis.get_reboot_cause()
proc_cmdline_reboot_cause = None

if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
# contain any software-related reboot info. We will use it as the previous cause.
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
# If yes, the content of /hosts/reboot-cause/reboot-cause.txt will be treated as the reboot cause
if is_warmfast_reboot_from_proc_cmdline():
if os.path.isfile(REBOOT_CAUSE_FILE):
cause_file = open(REBOOT_CAUSE_FILE, "r")
previous_reboot_cause = cause_file.readline().rstrip('\n')
cause_file.close()
# If it is FirstTime Boot and previous_reboot_cause is unknown
# and hardware_reboot cause is non_hardware then
# Update the reboot cause as required
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n')
else:
# /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't exist.
# report an error.
log_error("/proc/cmdline indicates a fast/warm reboot but {} doesn't exist".format(REBOOT_CAUSE_DIR))

if proc_cmdline_reboot_cause is not None:
previous_reboot_cause = proc_cmdline_reboot_cause
else:
previous_reboot_cause = hardware_reboot_cause
# 2. Check if the previous reboot was caused by hardware
# If yes, the hardware reboot cause will be treated as teh reboot cause
platform = sonic_platform.platform.Platform()

chassis = platform.get_chassis()

hardware_reboot_cause, optional_details = chassis.get_reboot_cause()

if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
# contain any software-related reboot info. We will use it as the previous cause.
if os.path.isfile(REBOOT_CAUSE_FILE):
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
previous_reboot_cause = cause_file.readline().rstrip('\n')
# If it is FirstTime Boot and previous_reboot_cause is unknown
# and hardware_reboot cause is non_hardware then
# Update the reboot cause as required
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
else:
previous_reboot_cause = hardware_reboot_cause
except ImportError as err:
log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")

# If there is a REBOOT_CAUSE_FILE, it will contain any software-related
# reboot info. We will use it as the previous cause.
if os.path.isfile(REBOOT_CAUSE_FILE):
cause_file = open(REBOOT_CAUSE_FILE, "r")
previous_reboot_cause = cause_file.readline().rstrip('\n')
cause_file.close()
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
previous_reboot_cause = cause_file.readline().rstrip('\n')

# If it is FirstTime Boot and previous_reboot_cause is unknown
# Update the reboot cause as required
Expand All @@ -115,9 +150,8 @@ def main():
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
# Write the previous reboot cause to PREVIOUS_REBOOT_CAUSE_FILE
prev_cause_file = open(PREVIOUS_REBOOT_CAUSE_FILE, "w")
prev_cause_file.write(previous_reboot_cause)
prev_cause_file.close()
with open(PREVIOUS_REBOOT_CAUSE_FILE, "w") as prev_cause_file:
prev_cause_file.write(previous_reboot_cause)

# Also log the previous reboot cause to the syslog
log_info("Previous reboot cause: {}".format(previous_reboot_cause))
Expand All @@ -127,9 +161,8 @@ def main():
os.remove(REBOOT_CAUSE_FILE)

# Write a new default reboot cause file for the next reboot
cause_file = open(REBOOT_CAUSE_FILE, "w")
cause_file.write(UNKNOWN_REBOOT_CAUSE)
cause_file.close()
with open(REBOOT_CAUSE_FILE, "w") as cause_file:
cause_file.write(UNKNOWN_REBOOT_CAUSE)


if __name__ == "__main__":
Expand Down