Commit 7565788d authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[master] Merge branch 'trac2244'

parents 183ea208 d12ce3c2
......@@ -141,6 +141,16 @@ it now. The new configuration is printed.
% BIND10_RECEIVED_SIGNAL received signal %1
The boss module received the given signal.
% BIND10_RESTART_COMPONENT_SKIPPED Skipped restarting a component %1
The boss module tried to restart a component after it failed (crashed)
unexpectedly, but the boss then found that the component had been removed
from its local configuration of components to run. This is an unusal
situation but can happen if the administrator removes the component from
the configuration after the component's crash and before the restart time.
The boss module simply skipped restarting that module, and the whole system
went back to the expected state (except that the crash itself is likely
to be a bug).
% BIND10_RESURRECTED_PROCESS resurrected %1 (PID %2)
The given process has been restarted successfully, and is now running
with the given process id.
......
......@@ -739,7 +739,7 @@ class BoB:
component = self.components.pop(pid)
logger.info(BIND10_PROCESS_ENDED, component.name(), pid,
exit_status)
if component.running() and self.runnable:
if component.is_running() and self.runnable:
# Tell it it failed. But only if it matters (we are
# not shutting down and the component considers itself
# to be running.
......@@ -771,7 +771,12 @@ class BoB:
next_restart_time = None
now = time.time()
for component in self.components_to_restart:
if not component.restart(now):
# If the component was removed from the configurator between since
# scheduled to restart, just ignore it. The object will just be
# dropped here.
if not self._component_configurator.has_component(component):
logger.info(BIND10_RESTART_COMPONENT_SKIPPED, component.name())
elif not component.restart(now):
still_dead.append(component)
if next_restart_time is None or\
next_restart_time > component.get_restart_time():
......
......@@ -929,7 +929,14 @@ class MockComponent:
self.name = lambda: name
self.pid = lambda: pid
self.address = lambda: address
self.restarted = False
def get_restart_time(self):
return 0 # arbitrary dummy value
def restart(self, now):
self.restarted = True
return True
class TestBossCmd(unittest.TestCase):
def test_ping(self):
......@@ -1266,6 +1273,34 @@ class TestBossComponents(unittest.TestCase):
bob.start_all_components()
self.__check_extended(self.__param)
def __setup_restart(self, bob, component):
'''Common procedure for restarting a component used below.'''
bob.components_to_restart = { component }
component.restarted = False
bob.restart_processes()
def test_restart_processes(self):
'''Check some behavior on restarting processes.'''
bob = MockBob()
bob.runnable = True
component = MockComponent('test', 53)
# A component to be restarted will actually be restarted iff it's
# in the configurator's configuration.
# We bruteforce the configurator internal below; ugly, but the easiest
# way for the test.
bob._component_configurator._components['test'] = (None, component)
self.__setup_restart(bob, component)
self.assertTrue(component.restarted)
self.assertFalse(component in bob.components_to_restart)
# Remove the component from the configuration. It won't be restarted
# even if scheduled, nor will remain in the to-be-restarted list.
del bob._component_configurator._components['test']
self.__setup_restart(bob, component)
self.assertFalse(component.restarted)
self.assertFalse(component in bob.components_to_restart)
class SocketSrvTest(unittest.TestCase):
"""
This tests some methods of boss related to the unix domain sockets used
......
......@@ -45,6 +45,7 @@ COMPONENT_RESTART_DELAY = 10
STATE_DEAD = 'dead'
STATE_STOPPED = 'stopped'
STATE_RESTARTING = 'restarting'
STATE_RUNNING = 'running'
def get_signame(signal_number):
......@@ -68,6 +69,7 @@ class BaseComponent:
explicitly).
- Running - after start() was called, it started successfully and is
now running.
- Restarting - the component failed (crashed) and is waiting for a restart
- Dead - it failed and can not be resurrected.
Init
......@@ -79,11 +81,11 @@ class BaseComponent:
| | |
|failure | failed() |
| | |
v | |
v | | start()/restart()
+<-----------+ |
| |
| kind == dispensable or kind|== needed and failed late
+-----------------------------+
+-----------------------> Restarting
|
| kind == core or kind == needed and it failed too soon
v
......@@ -163,7 +165,7 @@ class BaseComponent:
"""
if self.__state == STATE_DEAD:
raise ValueError("Can't resurrect already dead component")
if self.running():
if self.is_running():
raise ValueError("Can't start already running component")
logger.info(BIND10_COMPONENT_START, self.name())
self.__state = STATE_RUNNING
......@@ -188,7 +190,7 @@ class BaseComponent:
"""
# This is not tested. It talks with the outher world, which is out
# of scope of unittests.
if not self.running():
if not self.is_running():
raise ValueError("Can't stop a component which is not running")
logger.info(BIND10_COMPONENT_STOP, self.name())
self.__state = STATE_STOPPED
......@@ -234,9 +236,9 @@ class BaseComponent:
logger.error(BIND10_COMPONENT_FAILED, self.name(), self.pid(),
exit_str)
if not self.running():
if not self.is_running():
raise ValueError("Can't fail component that isn't running")
self.__state = STATE_STOPPED
self.__state = STATE_RESTARTING # tentatively set, maybe changed to DEAD
self._failed_internal()
# If it is a core component or the needed component failed to start
# (including it stopped really soon)
......@@ -284,7 +286,7 @@ class BaseComponent:
else:
return False
def running(self):
def is_running(self):
"""
Informs if the component is currently running. It assumes the failed
is called whenever the component really fails and there might be some
......@@ -296,6 +298,15 @@ class BaseComponent:
"""
return self.__state == STATE_RUNNING
def is_restarting(self):
"""Informs if the component has failed and is waiting for a restart.
Unlike the case of is_running(), if this returns True it always means
the corresponding process has died and not yet restarted.
"""
return self.__state == STATE_RESTARTING
def _start_internal(self):
"""
This method does the actual starting of a process. You need to override
......@@ -560,6 +571,13 @@ class Configurator:
self._running = False
self.__reconfigure_internal(self._components, {})
def has_component(self, component):
'''Return if a specified component is configured.'''
# Values of self._components are tuples of (config, component).
# Extract the components of the tuples and see if the given one
# is included.
return component in map(lambda x: x[1], self._components.values())
def reconfigure(self, configuration):
"""
Changes configuration from the current one to the provided. It
......@@ -591,7 +609,7 @@ class Configurator:
for cname in old.keys():
if cname not in new:
component = self._components[cname][1]
if component.running():
if component.is_running() or component.is_restarting():
plan.append({
'command': STOP_CMD,
'component': component,
......@@ -674,7 +692,7 @@ class Configurator:
self._components[task['name']] = (task['config'],
component)
elif command == STOP_CMD:
if component.running():
if component.is_running():
component.stop()
del self._components[task['name']]
else:
......
......@@ -191,7 +191,8 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertFalse(self.__start_called)
self.assertFalse(self.__stop_called)
self.assertFalse(self.__failed_called)
self.assertFalse(component.running())
self.assertFalse(component.is_running())
self.assertFalse(component.is_restarting())
# We can't stop or fail the component yet
self.assertRaises(ValueError, component.stop)
self.assertRaises(ValueError, component.failed, 1)
......@@ -204,7 +205,8 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertTrue(self.__start_called)
self.assertFalse(self.__stop_called)
self.assertFalse(self.__failed_called)
self.assertTrue(component.running())
self.assertTrue(component.is_running())
self.assertFalse(component.is_restarting())
def __check_dead(self, component):
"""
......@@ -215,7 +217,8 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertFalse(self.__stop_called)
self.assertTrue(self.__failed_called)
self.assertEqual(1, self._exitcode)
self.assertFalse(component.running())
self.assertFalse(component.is_running())
self.assertFalse(component.is_restarting())
# Surely it can't be stopped when already dead
self.assertRaises(ValueError, component.stop)
# Nor started
......@@ -234,7 +237,8 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertTrue(self.__start_called)
self.assertFalse(self.__stop_called)
self.assertTrue(self.__failed_called)
self.assertTrue(component.running())
self.assertTrue(component.is_running())
self.assertFalse(component.is_restarting())
# Check it can't be started again
self.assertRaises(ValueError, component.start)
......@@ -246,7 +250,8 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertTrue(self.__start_called)
self.assertFalse(self.__stop_called)
self.assertTrue(self.__failed_called)
self.assertFalse(component.running())
self.assertFalse(component.is_running())
self.assertTrue(component.is_restarting())
def __do_start_stop(self, kind):
"""
......@@ -270,7 +275,8 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertTrue(self.__start_called)
self.assertTrue(self.__stop_called)
self.assertFalse(self.__failed_called)
self.assertFalse(component.running())
self.assertFalse(component.is_running())
self.assertFalse(component.is_restarting())
# Check it can't be stopped twice
self.assertRaises(ValueError, component.stop)
# Or failed
......@@ -553,10 +559,10 @@ class ComponentTests(BossUtils, unittest.TestCase):
self.assertIsNone(component.pid())
self.assertEqual(['hello'], component._params)
self.assertEqual('Address', component._address)
self.assertFalse(component.running())
self.assertFalse(component.is_running())
self.assertEqual({}, self.__registered_processes)
component.start()
self.assertTrue(component.running())
self.assertTrue(component.is_running())
# Some versions of unittest miss assertIsInstance
self.assertTrue(isinstance(component._procinfo, TestProcInfo))
self.assertEqual(42, component.pid())
......@@ -580,11 +586,11 @@ class ComponentTests(BossUtils, unittest.TestCase):
"""
component = Component('component', self, 'needed', 'Address')
component.start()
self.assertTrue(component.running())
self.assertTrue(component.is_running())
self.assertEqual('component', self.__start_simple_params)
component.pid = lambda: 42
component.stop()
self.assertFalse(component.running())
self.assertFalse(component.is_running())
self.assertEqual(('component', 'Address', 42),
self.__stop_process_params)
......@@ -609,7 +615,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
component = Component('component', self, 'needed', 'Address',
[], ProcInfo)
component.start()
self.assertTrue(component.running())
self.assertTrue(component.is_running())
component.kill()
self.assertTrue(process.terminated)
self.assertFalse(process.killed)
......@@ -872,12 +878,13 @@ class ConfiguratorTest(BossUtils, unittest.TestCase):
'priority': 6,
'special': 'test',
'process': 'additional',
'kind': 'needed'
'kind': 'dispensable' # need to be dispensable so it could restart
}
self.log = []
plan = configurator._build_plan(self.__build_components(self.__core),
bigger)
self.assertEqual([('additional', 'init'), ('additional', 'needed')],
self.assertEqual([('additional', 'init'),
('additional', 'dispensable')],
self.log)
self.assertEqual(1, len(plan))
self.assertTrue('component' in plan[0])
......@@ -888,15 +895,29 @@ class ConfiguratorTest(BossUtils, unittest.TestCase):
# Now remove the one component again
# We run the plan so the component is wired into internal structures
configurator._run_plan(plan)
self.log = []
plan = configurator._build_plan(self.__build_components(bigger),
self.__core)
self.assertEqual([], self.log)
self.assertEqual([{
'command': 'stop',
'name': 'additional',
'component': component
}], plan)
# We should have the 'additional' component in the configurator.
self.assertTrue(configurator.has_component(component))
for count in [0, 1]: # repeat two times, see below
self.log = []
plan = configurator._build_plan(self.__build_components(bigger),
self.__core)
self.assertEqual([], self.log)
self.assertEqual([{
'command': 'stop',
'name': 'additional',
'component': component
}], plan)
if count is 0:
# We then emulate unexpected failure of the component (but
# before it restarts). It shouldn't confuse the
# configurator in the second phase of the test
component.failed(0)
# Run the plan, confirm the specified component is gone.
configurator._run_plan(plan)
self.assertFalse(configurator.has_component(component))
# There shouldn't be any other object that has the same name
self.assertFalse('additional' in configurator._components)
# We want to switch a component. So, prepare the configurator so it
# holds one
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment