-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[supervisor] Add patch to prevent 'supervisorctl start' command from …
…hanging if system time has rolled backward (#1311) * Add patch to prevent 'supervisorctl start' command from hanging if system time has rolled backward * Also add unit tests for clock rollback scenarios
- Loading branch information
Showing
6 changed files
with
285 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
# supervisor package | ||
|
||
SUPERVISOR = python-supervisor_3.3.2-1_all.deb | ||
SUPERVISOR_VERSION = 3.3.2 | ||
|
||
export SUPERVISOR_VERSION | ||
|
||
SUPERVISOR = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb | ||
$(SUPERVISOR)_SRC_PATH = $(SRC_PATH)/supervisor | ||
SONIC_MAKE_DEBS += $(SUPERVISOR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
src/supervisor/patch/0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
From bac496af4c2051b3c15038e06533f9094b1c3604 Mon Sep 17 00:00:00 2001 | ||
From: Joe LeVeque <jolevequ@microsoft.com> | ||
Date: Tue, 16 Jan 2018 20:42:13 +0000 | ||
Subject: [PATCH 1/2] Prevent excessive hanging/delays in process state | ||
transitions if the system time rolls backward | ||
|
||
--- | ||
supervisor/process.py | 32 ++++++++++++++++++++++++++++++++ | ||
1 file changed, 32 insertions(+) | ||
|
||
diff --git a/supervisor/process.py b/supervisor/process.py | ||
index f9ddcd9..216372f 100644 | ||
--- a/supervisor/process.py | ||
+++ b/supervisor/process.py | ||
@@ -357,6 +357,12 @@ class Subprocess: | ||
""" Log a 'waiting for x to stop' message with throttling. """ | ||
if self.state == ProcessStates.STOPPING: | ||
now = time.time() | ||
+ | ||
+ # If the system clock has moved backward, reset | ||
+ # self.laststopreport to current system time | ||
+ if now < self.laststopreport: | ||
+ self.laststopreport = now; | ||
+ | ||
if now > (self.laststopreport + 2): # every 2 seconds | ||
self.config.options.logger.info( | ||
'waiting for %s to stop' % self.config.name) | ||
@@ -604,12 +610,27 @@ class Subprocess: | ||
# STOPPED -> STARTING | ||
self.spawn() | ||
elif state == ProcessStates.BACKOFF: | ||
+ if self.delay > 0 and now < self.delay - self.backoff: | ||
+ # The system clock appears to have moved backward | ||
+ # Reset self.delay accordingly | ||
+ self.delay = now + self.backoff | ||
+ | ||
if self.backoff <= self.config.startretries: | ||
if now > self.delay: | ||
# BACKOFF -> STARTING | ||
self.spawn() | ||
|
||
if state == ProcessStates.STARTING: | ||
+ if now < self.laststart: | ||
+ # The system clock appears to have moved backward | ||
+ # Reset self.laststart to current system time | ||
+ self.laststart = now; | ||
+ | ||
+ if self.delay > 0 and now < self.delay - self.config.startsecs: | ||
+ # The system clock appears to have moved backward | ||
+ # Reset self.delay accordingly | ||
+ self.delay = now + self.config.startsecs | ||
+ | ||
if now - self.laststart > self.config.startsecs: | ||
# STARTING -> RUNNING if the proc has started | ||
# successfully and it has stayed up for at least | ||
@@ -633,6 +654,11 @@ class Subprocess: | ||
logger.info('gave up: %s %s' % (self.config.name, msg)) | ||
|
||
elif state == ProcessStates.STOPPING: | ||
+ if self.delay > 0 and now < self.delay - self.config.stopwaitsecs: | ||
+ # The system clock appears to have moved backward | ||
+ # Reset self.delay accordingly | ||
+ self.delay = now + self.config.stopwaitsecs | ||
+ | ||
time_left = self.delay - now | ||
if time_left <= 0: | ||
# kill processes which are taking too long to stop with a final | ||
@@ -805,6 +831,12 @@ class EventListenerPool(ProcessGroupBase): | ||
if dispatch_capable: | ||
if self.dispatch_throttle: | ||
now = time.time() | ||
+ | ||
+ # If the system clock has moved backward, reset | ||
+ # self.last_dispatch to current system time | ||
+ if now < self.last_dispatch: | ||
+ self.last_dispatch = now; | ||
+ | ||
if now - self.last_dispatch < self.dispatch_throttle: | ||
return | ||
self.dispatch() | ||
-- | ||
2.1.4 | ||
|
184 changes: 184 additions & 0 deletions
184
src/supervisor/patch/0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
From 43b6f3ee93593f666af2c6a7af897bf3b5ee762d Mon Sep 17 00:00:00 2001 | ||
From: Joe LeVeque <jolevequ@microsoft.com> | ||
Date: Wed, 17 Jan 2018 22:27:56 +0000 | ||
Subject: [PATCH 2/2] Add unit test cases to ensure state transitions work if | ||
system time has rolled backward | ||
|
||
--- | ||
supervisor/tests/test_process.py | 146 +++++++++++++++++++++++++++++++++++++++ | ||
1 file changed, 146 insertions(+) | ||
|
||
diff --git a/supervisor/tests/test_process.py b/supervisor/tests/test_process.py | ||
index 7f9bab2..ea16315 100644 | ||
--- a/supervisor/tests/test_process.py | ||
+++ b/supervisor/tests/test_process.py | ||
@@ -736,6 +736,40 @@ class SubprocessTests(unittest.TestCase): | ||
instance.stop_report() | ||
self.assertEqual(len(options.logger.data), 1) # throttled | ||
|
||
+ def test_stop_report_laststopreport_in_future(self): | ||
+ future_time = time.time() + 3600 # 1 hour into the future | ||
+ options = DummyOptions() | ||
+ config = DummyPConfig(options, 'test', '/test') | ||
+ instance = self._makeOne(config) | ||
+ instance.pid = 11 | ||
+ dispatcher = DummyDispatcher(writable=True) | ||
+ instance.dispatchers = {'foo':dispatcher} | ||
+ from supervisor.states import ProcessStates | ||
+ instance.state = ProcessStates.STOPPING | ||
+ instance.laststopreport = future_time | ||
+ | ||
+ # This iteration of stop_report() should reset instance.laststopreport | ||
+ # to the current time | ||
+ instance.stop_report() | ||
+ | ||
+ # No logging should have taken place | ||
+ self.assertEqual(len(options.logger.data), 0) | ||
+ | ||
+ # Ensure instance.laststopreport has rolled backward | ||
+ self.assertTrue(instance.laststopreport < future_time) | ||
+ | ||
+ # Sleep for 2 seconds | ||
+ time.sleep(2) | ||
+ | ||
+ # This iteration of stop_report() should actaully trigger the report | ||
+ instance.stop_report() | ||
+ | ||
+ self.assertEqual(len(options.logger.data), 1) | ||
+ self.assertEqual(options.logger.data[0], 'waiting for test to stop') | ||
+ self.assertNotEqual(instance.laststopreport, 0) | ||
+ instance.stop_report() | ||
+ self.assertEqual(len(options.logger.data), 1) # throttled | ||
+ | ||
def test_give_up(self): | ||
options = DummyOptions() | ||
config = DummyPConfig(options, 'test', '/test') | ||
@@ -1402,6 +1436,92 @@ class SubprocessTests(unittest.TestCase): | ||
event = L[0] | ||
self.assertEqual(event.__class__, events.ProcessStateRunningEvent) | ||
|
||
+ def test_transition_starting_to_running_laststart_in_future(self): | ||
+ from supervisor import events | ||
+ L = [] | ||
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x)) | ||
+ from supervisor.states import ProcessStates | ||
+ | ||
+ future_time = time.time() + 3600 # 1 hour into the future | ||
+ options = DummyOptions() | ||
+ test_startsecs = 2 | ||
+ | ||
+ # this should go from STARTING to RUNNING via transition() | ||
+ pconfig = DummyPConfig(options, 'process', 'process','/bin/process', | ||
+ startsecs=test_startsecs) | ||
+ process = self._makeOne(pconfig) | ||
+ process.backoff = 1 | ||
+ process.delay = 1 | ||
+ process.system_stop = False | ||
+ process.laststart = future_time | ||
+ process.pid = 1 | ||
+ process.stdout_buffer = 'abc' | ||
+ process.stderr_buffer = 'def' | ||
+ process.state = ProcessStates.STARTING | ||
+ | ||
+ # This iteration of transition() should reset process.laststart | ||
+ # to the current time | ||
+ process.transition() | ||
+ | ||
+ # Process state should still be STARTING | ||
+ self.assertEqual(process.state, ProcessStates.STARTING) | ||
+ | ||
+ # Ensure process.laststart has rolled backward | ||
+ self.assertTrue(process.laststart < future_time) | ||
+ | ||
+ # Sleep for (startsecs + 1) | ||
+ time.sleep(test_startsecs + 1) | ||
+ | ||
+ # This iteration of transition() should actaully trigger the state | ||
+ # transition to RUNNING | ||
+ process.transition() | ||
+ | ||
+ # this implies RUNNING | ||
+ self.assertEqual(process.backoff, 0) | ||
+ self.assertEqual(process.delay, 0) | ||
+ self.assertFalse(process.system_stop) | ||
+ self.assertEqual(process.state, ProcessStates.RUNNING) | ||
+ self.assertEqual(options.logger.data[0], | ||
+ 'success: process entered RUNNING state, process has ' | ||
+ 'stayed up for > than {} seconds (startsecs)'.format(test_startsecs)) | ||
+ self.assertEqual(len(L), 1) | ||
+ event = L[0] | ||
+ self.assertEqual(event.__class__, events.ProcessStateRunningEvent) | ||
+ | ||
+ def test_transition_backoff_to_starting_delay_in_future(self): | ||
+ from supervisor import events | ||
+ L = [] | ||
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x)) | ||
+ from supervisor.states import ProcessStates, SupervisorStates | ||
+ | ||
+ future_time = time.time() + 3600 # 1 hour into the future | ||
+ options = DummyOptions() | ||
+ | ||
+ pconfig = DummyPConfig(options, 'process', 'process','/bin/process') | ||
+ process = self._makeOne(pconfig) | ||
+ process.laststart = 1 | ||
+ process.delay = future_time | ||
+ process.backoff = 0 | ||
+ process.state = ProcessStates.BACKOFF | ||
+ | ||
+ # This iteration of transition() should reset process.delay | ||
+ # to the current time | ||
+ process.transition() | ||
+ | ||
+ # Process state should still be BACKOFF | ||
+ self.assertEqual(process.state, ProcessStates.BACKOFF) | ||
+ | ||
+ # Ensure process.delay has rolled backward | ||
+ self.assertTrue(process.delay < future_time) | ||
+ | ||
+ # This iteration of transition() should actaully trigger the state | ||
+ # transition to STARTING | ||
+ process.transition() | ||
+ | ||
+ self.assertEqual(process.state, ProcessStates.STARTING) | ||
+ self.assertEqual(len(L), 1) | ||
+ self.assertEqual(L[0].__class__, events.ProcessStateStartingEvent) | ||
+ | ||
def test_transition_backoff_to_fatal(self): | ||
from supervisor import events | ||
L = [] | ||
@@ -2033,6 +2153,32 @@ class EventListenerPoolTests(ProcessGroupBaseTests): | ||
self.assertEqual(process1.listener_state, EventListenerStates.BUSY) | ||
self.assertEqual(process1.event, event) | ||
|
||
+ def test_transition_event_proc_running_with_dispatch_throttle_last_dispatch_in_future(self): | ||
+ future_time = time.time() + 3600 # 1 hour into the future | ||
+ options = DummyOptions() | ||
+ from supervisor.states import ProcessStates | ||
+ pconfig1 = DummyPConfig(options, 'process1', 'process1','/bin/process1') | ||
+ process1 = DummyProcess(pconfig1, state=ProcessStates.RUNNING) | ||
+ gconfig = DummyPGroupConfig(options, pconfigs=[pconfig1]) | ||
+ pool = self._makeOne(gconfig) | ||
+ pool.dispatch_throttle = 5 | ||
+ pool.last_dispatch = future_time | ||
+ pool.processes = {'process1': process1} | ||
+ event = DummyEvent() | ||
+ from supervisor.states import EventListenerStates | ||
+ process1.listener_state = EventListenerStates.READY | ||
+ class DummyGroup: | ||
+ config = gconfig | ||
+ process1.group = DummyGroup | ||
+ pool._acceptEvent(event) | ||
+ pool.transition() | ||
+ | ||
+ self.assertEqual(process1.transitioned, True) | ||
+ self.assertEqual(pool.event_buffer, [event]) # not popped | ||
+ | ||
+ # Ensure pool.last_dispatch has been rolled backward | ||
+ self.assertTrue(pool.last_dispatch < future_time) | ||
+ | ||
def test__dispatchEvent_notready(self): | ||
options = DummyOptions() | ||
from supervisor.states import ProcessStates | ||
-- | ||
2.1.4 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch | ||
0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch |