Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[supervisor] Add patch to prevent 'supervisorctl start' command from hanging if system time has rolled backward #1311

Merged
merged 2 commits into from
Jan 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ src/sonic-device-data/src/device/
src/sonic-device-data/src/debian/
src/supervisor/*
!src/supervisor/Makefile
!src/supervisor/patch/
src/thrift/*
!src/thrift/Makefile

Expand Down
6 changes: 5 additions & 1 deletion rules/supervisor.mk
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# supervisor package

SUPERVISOR = python-supervisor_3.3.2-1_all.deb
SUPERVISOR_VERSION = 3.3.2

export SUPERVISOR_VERSION

SUPERVISOR = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb
$(SUPERVISOR)_SRC_PATH = $(SRC_PATH)/supervisor
SONIC_MAKE_DEBS += $(SUPERVISOR)
12 changes: 10 additions & 2 deletions src/supervisor/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
SHELL = /bin/bash
.SHELLFLAGS += -e

MAIN_TARGET = python-supervisor_3.3.2-1_all.deb
MAIN_TARGET = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb

$(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
# Remove any stale files
Expand All @@ -11,7 +11,15 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
# Clone supervisor repo
git clone https://github.com/Supervisor/supervisor.git
pushd ./supervisor
git checkout -f 3.3.2

# Reset HEAD to the commit of the proper tag
# NOTE: Using "git checkout <tag_name>" here detaches our HEAD,
# which stg doesn't like, so we use this method instead
git reset --hard $(SUPERVISOR_VERSION)

# Apply patches
stg init
stg import -s ../patch/series

# Build Python and Debian package
python setup.py --command-packages=stdeb.command bdist_deb
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
From bac496af4c2051b3c15038e06533f9094b1c3604 Mon Sep 17 00:00:00 2001
From: Joe LeVeque <jolevequ@microsoft.com>
Date: Tue, 16 Jan 2018 20:42:13 +0000
Subject: [PATCH 1/2] Prevent excessive hanging/delays in process state
transitions if the system time rolls backward

---
supervisor/process.py | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)

diff --git a/supervisor/process.py b/supervisor/process.py
index f9ddcd9..216372f 100644
--- a/supervisor/process.py
+++ b/supervisor/process.py
@@ -357,6 +357,12 @@ class Subprocess:
""" Log a 'waiting for x to stop' message with throttling. """
if self.state == ProcessStates.STOPPING:
now = time.time()
+
+ # If the system clock has moved backward, reset
+ # self.laststopreport to current system time
+ if now < self.laststopreport:
+ self.laststopreport = now;
+
if now > (self.laststopreport + 2): # every 2 seconds
self.config.options.logger.info(
'waiting for %s to stop' % self.config.name)
@@ -604,12 +610,27 @@ class Subprocess:
# STOPPED -> STARTING
self.spawn()
elif state == ProcessStates.BACKOFF:
+ if self.delay > 0 and now < self.delay - self.backoff:
+ # The system clock appears to have moved backward
+ # Reset self.delay accordingly
+ self.delay = now + self.backoff
+
if self.backoff <= self.config.startretries:
if now > self.delay:
# BACKOFF -> STARTING
self.spawn()

if state == ProcessStates.STARTING:
+ if now < self.laststart:
+ # The system clock appears to have moved backward
+ # Reset self.laststart to current system time
+ self.laststart = now;
+
+ if self.delay > 0 and now < self.delay - self.config.startsecs:
+ # The system clock appears to have moved backward
+ # Reset self.delay accordingly
+ self.delay = now + self.config.startsecs
+
if now - self.laststart > self.config.startsecs:
# STARTING -> RUNNING if the proc has started
# successfully and it has stayed up for at least
@@ -633,6 +654,11 @@ class Subprocess:
logger.info('gave up: %s %s' % (self.config.name, msg))

elif state == ProcessStates.STOPPING:
+ if self.delay > 0 and now < self.delay - self.config.stopwaitsecs:
+ # The system clock appears to have moved backward
+ # Reset self.delay accordingly
+ self.delay = now + self.config.stopwaitsecs
+
time_left = self.delay - now
if time_left <= 0:
# kill processes which are taking too long to stop with a final
@@ -805,6 +831,12 @@ class EventListenerPool(ProcessGroupBase):
if dispatch_capable:
if self.dispatch_throttle:
now = time.time()
+
+ # If the system clock has moved backward, reset
+ # self.last_dispatch to current system time
+ if now < self.last_dispatch:
+ self.last_dispatch = now;
+
if now - self.last_dispatch < self.dispatch_throttle:
return
self.dispatch()
--
2.1.4

Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
From 43b6f3ee93593f666af2c6a7af897bf3b5ee762d Mon Sep 17 00:00:00 2001
From: Joe LeVeque <jolevequ@microsoft.com>
Date: Wed, 17 Jan 2018 22:27:56 +0000
Subject: [PATCH 2/2] Add unit test cases to ensure state transitions work if
system time has rolled backward

---
supervisor/tests/test_process.py | 146 +++++++++++++++++++++++++++++++++++++++
1 file changed, 146 insertions(+)

diff --git a/supervisor/tests/test_process.py b/supervisor/tests/test_process.py
index 7f9bab2..ea16315 100644
--- a/supervisor/tests/test_process.py
+++ b/supervisor/tests/test_process.py
@@ -736,6 +736,40 @@ class SubprocessTests(unittest.TestCase):
instance.stop_report()
self.assertEqual(len(options.logger.data), 1) # throttled

+ def test_stop_report_laststopreport_in_future(self):
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+ config = DummyPConfig(options, 'test', '/test')
+ instance = self._makeOne(config)
+ instance.pid = 11
+ dispatcher = DummyDispatcher(writable=True)
+ instance.dispatchers = {'foo':dispatcher}
+ from supervisor.states import ProcessStates
+ instance.state = ProcessStates.STOPPING
+ instance.laststopreport = future_time
+
+ # This iteration of stop_report() should reset instance.laststopreport
+ # to the current time
+ instance.stop_report()
+
+ # No logging should have taken place
+ self.assertEqual(len(options.logger.data), 0)
+
+ # Ensure instance.laststopreport has rolled backward
+ self.assertTrue(instance.laststopreport < future_time)
+
+ # Sleep for 2 seconds
+ time.sleep(2)
+
+ # This iteration of stop_report() should actaully trigger the report
+ instance.stop_report()
+
+ self.assertEqual(len(options.logger.data), 1)
+ self.assertEqual(options.logger.data[0], 'waiting for test to stop')
+ self.assertNotEqual(instance.laststopreport, 0)
+ instance.stop_report()
+ self.assertEqual(len(options.logger.data), 1) # throttled
+
def test_give_up(self):
options = DummyOptions()
config = DummyPConfig(options, 'test', '/test')
@@ -1402,6 +1436,92 @@ class SubprocessTests(unittest.TestCase):
event = L[0]
self.assertEqual(event.__class__, events.ProcessStateRunningEvent)

+ def test_transition_starting_to_running_laststart_in_future(self):
+ from supervisor import events
+ L = []
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x))
+ from supervisor.states import ProcessStates
+
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+ test_startsecs = 2
+
+ # this should go from STARTING to RUNNING via transition()
+ pconfig = DummyPConfig(options, 'process', 'process','/bin/process',
+ startsecs=test_startsecs)
+ process = self._makeOne(pconfig)
+ process.backoff = 1
+ process.delay = 1
+ process.system_stop = False
+ process.laststart = future_time
+ process.pid = 1
+ process.stdout_buffer = 'abc'
+ process.stderr_buffer = 'def'
+ process.state = ProcessStates.STARTING
+
+ # This iteration of transition() should reset process.laststart
+ # to the current time
+ process.transition()
+
+ # Process state should still be STARTING
+ self.assertEqual(process.state, ProcessStates.STARTING)
+
+ # Ensure process.laststart has rolled backward
+ self.assertTrue(process.laststart < future_time)
+
+ # Sleep for (startsecs + 1)
+ time.sleep(test_startsecs + 1)
+
+ # This iteration of transition() should actaully trigger the state
+ # transition to RUNNING
+ process.transition()
+
+ # this implies RUNNING
+ self.assertEqual(process.backoff, 0)
+ self.assertEqual(process.delay, 0)
+ self.assertFalse(process.system_stop)
+ self.assertEqual(process.state, ProcessStates.RUNNING)
+ self.assertEqual(options.logger.data[0],
+ 'success: process entered RUNNING state, process has '
+ 'stayed up for > than {} seconds (startsecs)'.format(test_startsecs))
+ self.assertEqual(len(L), 1)
+ event = L[0]
+ self.assertEqual(event.__class__, events.ProcessStateRunningEvent)
+
+ def test_transition_backoff_to_starting_delay_in_future(self):
+ from supervisor import events
+ L = []
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x))
+ from supervisor.states import ProcessStates, SupervisorStates
+
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+
+ pconfig = DummyPConfig(options, 'process', 'process','/bin/process')
+ process = self._makeOne(pconfig)
+ process.laststart = 1
+ process.delay = future_time
+ process.backoff = 0
+ process.state = ProcessStates.BACKOFF
+
+ # This iteration of transition() should reset process.delay
+ # to the current time
+ process.transition()
+
+ # Process state should still be BACKOFF
+ self.assertEqual(process.state, ProcessStates.BACKOFF)
+
+ # Ensure process.delay has rolled backward
+ self.assertTrue(process.delay < future_time)
+
+ # This iteration of transition() should actaully trigger the state
+ # transition to STARTING
+ process.transition()
+
+ self.assertEqual(process.state, ProcessStates.STARTING)
+ self.assertEqual(len(L), 1)
+ self.assertEqual(L[0].__class__, events.ProcessStateStartingEvent)
+
def test_transition_backoff_to_fatal(self):
from supervisor import events
L = []
@@ -2033,6 +2153,32 @@ class EventListenerPoolTests(ProcessGroupBaseTests):
self.assertEqual(process1.listener_state, EventListenerStates.BUSY)
self.assertEqual(process1.event, event)

+ def test_transition_event_proc_running_with_dispatch_throttle_last_dispatch_in_future(self):
+ future_time = time.time() + 3600 # 1 hour into the future
+ options = DummyOptions()
+ from supervisor.states import ProcessStates
+ pconfig1 = DummyPConfig(options, 'process1', 'process1','/bin/process1')
+ process1 = DummyProcess(pconfig1, state=ProcessStates.RUNNING)
+ gconfig = DummyPGroupConfig(options, pconfigs=[pconfig1])
+ pool = self._makeOne(gconfig)
+ pool.dispatch_throttle = 5
+ pool.last_dispatch = future_time
+ pool.processes = {'process1': process1}
+ event = DummyEvent()
+ from supervisor.states import EventListenerStates
+ process1.listener_state = EventListenerStates.READY
+ class DummyGroup:
+ config = gconfig
+ process1.group = DummyGroup
+ pool._acceptEvent(event)
+ pool.transition()
+
+ self.assertEqual(process1.transitioned, True)
+ self.assertEqual(pool.event_buffer, [event]) # not popped
+
+ # Ensure pool.last_dispatch has been rolled backward
+ self.assertTrue(pool.last_dispatch < future_time)
+
def test__dispatchEvent_notready(self):
options = DummyOptions()
from supervisor.states import ProcessStates
--
2.1.4

2 changes: 2 additions & 0 deletions src/supervisor/patch/series
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch
0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch