Commit 405d85c8 authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[master] Merge branch 'trac1858'

 commit.
parents 9565a793 ae0565a4
......@@ -167,6 +167,30 @@ so BIND 10 will now shut down. The specific error is printed.
% BIND10_SEND_SIGKILL sending SIGKILL to %1 (PID %2)
The boss module is sending a SIGKILL signal to the given process.
% BIND10_SEND_SIGNAL_FAIL sending %1 to %2 (PID %3) failed: %4
The boss module sent a single (either SIGTERM or SIGKILL) to a process,
but it failed due to some system level error. There are two major cases:
the target process has already terminated but the boss module had sent
the signal before it noticed the termination. In this case an error
message should indicate something like "no such process". This can be
safely ignored. The other case is that the boss module doesn't have
the privilege to send a signal to the process. It can typically
happen when the boss module started as a privileged process, spawned a
subprocess, and then dropped the privilege. It includes the case for
the socket creator when the boss process runs with the -u command line
option. In this case, the boss module simply gives up to terminate
the process explicitly because it's unlikely to succeed by keeping
sending the signal. Although the socket creator is implemented so
that it will terminate automatically when the boss process exits
(and that should be the case for any other future process running with
a higher privilege), but it's recommended to check if there's any
remaining BIND 10 process if this message is logged. For all other
cases, the boss module will keep sending the signal until it confirms
all child processes terminate. Although unlikely, this could prevent
the boss module from exiting, just keeping sending the signals. So,
again, it's advisable to check if it really terminates when this
message is logged.
% BIND10_SEND_SIGTERM sending SIGTERM to %1 (PID %2)
The boss module is sending a SIGTERM signal to the given process.
......@@ -274,13 +298,6 @@ During the startup process, a number of messages are exchanged between the
Boss process and the processes it starts. This error is output when a
message received by the Boss process is not recognised.
% BIND10_START_AS_NON_ROOT_AUTH starting b10-auth as a user, not root. This might fail.
The authoritative server is being started or restarted without root privileges.
If the module needs these privileges, it may have problems starting.
Note that this issue should be resolved by the pending 'socket-creator'
process; once that has been implemented, modules should not need root
privileges anymore. See tickets #800 and #801 for more information.
% BIND10_START_AS_NON_ROOT_RESOLVER starting b10-resolver as a user, not root. This might fail.
The resolver is being started or restarted without root privileges.
If the module needs these privileges, it may have problems starting.
......
......@@ -546,8 +546,6 @@ class BoB:
"""
Start the Authoritative server
"""
if self.uid is not None and self.__started:
logger.warn(BIND10_START_AS_NON_ROOT_AUTH)
authargs = ['b10-auth']
if self.verbose:
authargs += ['-v']
......@@ -693,32 +691,42 @@ class BoB:
# from doing so
if not self.nokill:
# next try sending a SIGTERM
components_to_stop = list(self.components.values())
for component in components_to_stop:
logger.info(BIND10_SEND_SIGTERM, component.name(), component.pid())
try:
component.kill()
except OSError:
# ignore these (usually ESRCH because the child
# finally exited)
pass
# finally, send SIGKILL (unmaskable termination) until everybody dies
self.__kill_children(False)
# finally, send SIGKILL (unmaskable termination) until everybody
# dies
while self.components:
# XXX: some delay probably useful... how much is uncertain
time.sleep(0.1)
self.reap_children()
components_to_stop = list(self.components.values())
for component in components_to_stop:
logger.info(BIND10_SEND_SIGKILL, component.name(),
component.pid())
try:
component.kill(True)
except OSError:
# ignore these (usually ESRCH because the child
# finally exited)
pass
self.__kill_children(True)
logger.info(BIND10_SHUTDOWN_COMPLETE)
def __kill_children(self, forceful):
'''Terminate remaining subprocesses by sending a signal.
The forceful paramter will be passed Component.kill().
This is a dedicated subroutine of shutdown(), just to unify two
similar cases.
'''
logmsg = BIND10_SEND_SIGKILL if forceful else BIND10_SEND_SIGTERM
# We need to make a copy of values as the components may be modified
# in the loop.
for component in list(self.components.values()):
logger.info(logmsg, component.name(), component.pid())
try:
component.kill(forceful)
except OSError as ex:
# If kill() failed due to EPERM, it doesn't make sense to
# keep trying, so we just log the fact and forget that
# component. Ignore other OSErrors (usually ESRCH because
# the child finally exited)
signame = "SIGKILL" if forceful else "SIGTERM"
logger.info(BIND10_SEND_SIGNAL_FAIL, signame,
component.name(), component.pid(), ex)
if ex.errno == errno.EPERM:
del self.components[component.pid()]
def _get_process_exit_status(self):
return os.waitpid(-1, os.WNOHANG)
......
......@@ -1181,7 +1181,7 @@ class TestBossComponents(unittest.TestCase):
# We check somewhere else that the shutdown is actually called
# from there (the test_kills).
def __real_test_kill(self, nokill = False):
def __real_test_kill(self, nokill=False, ex_on_kill=None):
"""
Helper function that does the actual kill functionality testing.
"""
......@@ -1195,8 +1195,23 @@ class TestBossComponents(unittest.TestCase):
(anyway it is not told so). It does not die if it is killed
the first time. It dies only when killed forcefully.
"""
def __init__(self):
# number of kill() calls, preventing infinite loop.
self.__call_count = 0
def kill(self, forceful=False):
self.__call_count += 1
if self.__call_count > 2:
raise Exception('Too many calls to ImmortalComponent.kill')
killed.append(forceful)
if ex_on_kill is not None:
# If exception is given by the test, raise it here.
# In the case of ESRCH, the process should have gone
# somehow, so we clear the components.
if ex_on_kill.errno == errno.ESRCH:
bob.components = {}
raise ex_on_kill
if forceful:
bob.components = {}
def pid(self):
......@@ -1224,7 +1239,10 @@ class TestBossComponents(unittest.TestCase):
if nokill:
self.assertEqual([], killed)
else:
self.assertEqual([False, True], killed)
if ex_on_kill is not None:
self.assertEqual([False], killed)
else:
self.assertEqual([False, True], killed)
self.assertTrue(self.__called)
......@@ -1236,6 +1254,20 @@ class TestBossComponents(unittest.TestCase):
"""
self.__real_test_kill()
def test_kill_fail(self):
"""Test cases where kill() results in an exception due to OS error.
The behavior should be different for EPERM, so we test two cases.
"""
ex = OSError()
ex.errno, ex.strerror = errno.ESRCH, 'No such process'
self.__real_test_kill(ex_on_kill=ex)
ex.errno, ex.strerror = errno.EPERM, 'Operation not permitted'
self.__real_test_kill(ex_on_kill=ex)
def test_nokill(self):
"""
Test that the boss *doesn't* kill components which don't want to
......
......@@ -214,9 +214,14 @@ class Creator(Parser):
socket.SOCK_STREAM)
env = copy.deepcopy(os.environ)
env['PATH'] = path
# We explicitly set close_fs to True; it's False by default before
# Python 3.2. If we don't close the remaining FDs, the copy of
# 'local' will prevent the child process from terminating when
# the parent process died abruptly.
self.__process = subprocess.Popen(['b10-sockcreator'], env=env,
stdin=remote.fileno(),
stdout=remote2.fileno(),
close_fds=True,
preexec_fn=self.__preexec_work)
remote.close()
remote2.close()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment