Commit 6dae0d1f authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[master] Merge branch 'trac2617'

parents 6ded04bb 37743e4e
......@@ -312,6 +312,9 @@ been created and is initializing. The AUTH_SERVER_STARTED message will be
output when initialization has successfully completed and the server starts
accepting queries.
The authoritative server is exiting.
% AUTH_SERVER_FAILED server failed: %1
The authoritative server has encountered a fatal error and is terminating. The
reason for the failure is included in the message.
......@@ -44,6 +44,7 @@
#include <server_common/socket_request.h>
#include <boost/bind.hpp>
#include <boost/scoped_ptr.hpp>
#include <sys/types.h>
#include <sys/socket.h>
......@@ -152,10 +153,11 @@ main(int argc, char* argv[]) {
int ret = 0;
// XXX: we should eventually pass io_service here.
Session* cc_session = NULL;
Session* xfrin_session = NULL;
boost::scoped_ptr<AuthSrv> auth_server_; // placeholder
boost::scoped_ptr<Session> cc_session;
boost::scoped_ptr<Session> xfrin_session;
bool xfrin_session_established = false; // XXX (see Trac #287)
ModuleCCSession* config_session = NULL;
boost::scoped_ptr<ModuleCCSession> config_session;
XfroutClient xfrout_client(getXfroutSocketPath());
SocketSessionForwarder ddns_forwarder(getDDNSSocketPath());
try {
......@@ -167,7 +169,8 @@ main(int argc, char* argv[]) {
specfile = string(AUTH_SPECFILE_LOCATION);
auth_server = new AuthSrv(xfrout_client, ddns_forwarder);
auth_server_.reset(new AuthSrv(xfrout_client, ddns_forwarder));
auth_server = auth_server_.get();
SimpleCallback* checkin = auth_server->getCheckinProvider();
......@@ -179,7 +182,7 @@ main(int argc, char* argv[]) {
cc_session = new Session(io_service.get_io_service());
cc_session.reset(new Session(io_service.get_io_service()));
// Initialize the Socket Requestor
isc::server_common::initSocketRequestor(*cc_session, AUTH_NAME);
......@@ -191,18 +194,18 @@ main(int argc, char* argv[]) {
// updateConfig() for listen_on and in initializing TSIG keys below).
// Until then all operations on the CC session will take place
// synchronously.
config_session = new ModuleCCSession(specfile, *cc_session,
my_command_handler, false);
config_session.reset(new ModuleCCSession(specfile, *cc_session,
my_command_handler, false));
xfrin_session = new Session(io_service.get_io_service());
xfrin_session.reset(new Session(io_service.get_io_service()));
xfrin_session_established = true;
// Configure the server. configureAuthServer() is expected to install
// all initial configurations, but as a short term workaround we
......@@ -210,7 +213,7 @@ main(int argc, char* argv[]) {
// updateConfig().
// if server load configure failed, we won't exit, give user second
// chance to correct the configure.
try {
configureAuthServer(*auth_server, config_session->getFullConfig());
......@@ -228,7 +231,7 @@ main(int argc, char* argv[]) {
auth_server, &first_time,
_1, _2, _3),
......@@ -260,10 +263,7 @@ main(int argc, char* argv[]) {
delete xfrin_session;
delete config_session;
delete cc_session;
delete auth_server;
return (ret);
......@@ -650,4 +650,6 @@ if __name__ == '__main__':
if httpd:
......@@ -43,6 +43,9 @@ specific error is printed in the message.
This debug message indicates that the given command has been sent to
the given module.
The b10-cmdctl daemon is exiting.
% CMDCTL_NO_SUCH_USER username not found in user database: %1
A login attempt was made to b10-cmdctl, but the username was not known.
Users can be added with the tool b10-cmdctl-usermgr.
......@@ -70,6 +70,23 @@ SPECFILE_LOCATION = SPECFILE_PATH + "/msgq.spec"
class MsgQReceiveError(Exception): pass
class MsgQCloseOnReceive(Exception):
"""Exception raised when reading data from a socket results in 'shutdown'.
This happens when msgq received 0-length data. This class holds whether
it happens in the middle of reading (i.e. after reading some) via
partial_read parameter, which is set to True if and only if so.
This will be used by an upper layer catching the exception to distinguish
the severity of the event.
def __init__(self, reason, partial_read):
self.partial_read = partial_read
self.__reason = reason
def __str__(self):
return self.__reason
class SubscriptionManager:
def __init__(self, cfgmgr_ready):
......@@ -311,23 +328,22 @@ class MsgQ:
lname = self.newlname()
self.lnames[lname] = newsocket
logger.debug(TRACE_BASIC, MSGQ_SOCKET_REGISTERED, newsocket.fileno(),
if self.poller:
self.poller.register(newsocket, select.POLLIN)
def process_socket(self, fd):
"""Process a read on a socket."""
if not fd in self.sockets:
logger.error(MSGQ_READ_UNKNOWN_FD, fd)
sock = self.sockets[fd]
self.process_packet(fd, sock)
def kill_socket(self, fd, sock):
"""Fully close down the socket."""
# Unregister events on the socket. Note that we don't have to do
# this for kqueue because the registered events are automatically
# deleted when the corresponding socket is closed.
if self.poller:
lname = [ k for k, v in self.lnames.items() if v == sock ][0]
del self.lnames[lname]
......@@ -337,24 +353,35 @@ class MsgQ:
del self.sendbuffs[fd]
logger.debug(TRACE_BASIC, MSGQ_SOCK_CLOSE, fd)
def getbytes(self, fd, sock, length):
def __getbytes(self, fd, sock, length, continued):
"""Get exactly the requested bytes, or raise an exception if
continued is set to True if this method is called to complete
already read data.
received = b''
while len(received) < length:
data = sock.recv(length - len(received))
except socket.error:
raise MsgQReceiveError(socket.error)
except socket.error as err:
# This case includes ECONNRESET, which seems to happen when
# the remote client has closed its socket at some subtle
# timing (it should normally result in receiving empty data).
# Since we didn't figure out how exactly that could happen,
# we treat it just like other really-unexpected socket errors.
raise MsgQReceiveError(str(err))
if len(data) == 0:
raise MsgQReceiveError("EOF")
raise MsgQCloseOnReceive("EOF", continued)
received += data
continued = True
return received
def read_packet(self, fd, sock):
"""Read a correctly formatted packet. Will raise exceptions if
something fails."""
lengths = self.getbytes(fd, sock, 6)
lengths = self.__getbytes(fd, sock, 6, False)
overall_length, routing_length = struct.unpack(">IH", lengths)
if overall_length < 2:
raise MsgQReceiveError("overall_length < 2")
......@@ -365,9 +392,9 @@ class MsgQ:
raise MsgQReceiveError("routing_length == 0")
data_length = overall_length - routing_length
# probably need to sanity check lengths here...
routing = self.getbytes(fd, sock, routing_length)
routing = self.__getbytes(fd, sock, routing_length, True)
if data_length > 0:
data = self.getbytes(fd, sock, data_length)
data = self.__getbytes(fd, sock, data_length, True)
data = None
return (routing, data)
......@@ -376,8 +403,15 @@ class MsgQ:
"""Process one packet."""
routing, data = self.read_packet(fd, sock)
except MsgQReceiveError as err:
logger.error(MSGQ_RECV_ERR, fd, err)
except (MsgQReceiveError, MsgQCloseOnReceive) as err:
# If it's MsgQCloseOnReceive and that happens without reading
# any data, it basically means the remote clinet has closed the
# socket, so we log it as debug information. Otherwise, it's
# a somewhat unexpected event, so we consider it an "error".
if isinstance(err, MsgQCloseOnReceive) and not err.partial_read:
logger.error(MSGQ_RECV_ERR, fd, err)
self.kill_socket(fd, sock)
......@@ -428,9 +462,12 @@ class MsgQ:
def sendmsg(self, sock, env, msg = None):
self.send_prepared_msg(sock, self.preparemsg(env, msg))
def __send_data(self, sock, data):
def _send_data(self, sock, data):
Send a piece of data to the given socket.
Send a piece of data to the given socket. This method is
essentially "private" to MsgQ, but defined as if it were "protected"
for easier access from tests.
sock: The socket to send to
data: The list of bytes to send
......@@ -446,15 +483,17 @@ class MsgQ:
return sock.send(data)
except socket.error as e:
if e.errno in [ errno.EAGAIN,
errno.EINTR ]:
if e.errno in [ errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR ]:
return 0
elif e.errno in [ errno.EPIPE,
errno.ENOBUFS ]:
logger.error(MSGQ_SEND_ERR, sock.fileno(),
elif e.errno in [ errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS ]:
# EPIPE happens if the remote module has terminated by the time
# of this send; its severity can vary, but in many cases it
# shouldn't be critical, so we log it separately as a warning.
if e.errno == errno.EPIPE:
logger.warn(MSGQ_CLOSE_ON_SEND, sock.fileno())
logger.error(MSGQ_SEND_ERR, sock.fileno(),
self.kill_socket(sock.fileno(), sock)
return None
......@@ -469,7 +508,7 @@ class MsgQ:
if fileno in self.sendbuffs:
amount_sent = 0
amount_sent = self.__send_data(sock, msg)
amount_sent = self._send_data(sock, msg)
if amount_sent is None:
# Socket has been killed, drop the send
......@@ -489,7 +528,7 @@ class MsgQ:
last_sent = now
if self.poller:
self.poller.register(fileno, select.POLLIN |
self.add_kqueue_socket(sock, True)
self.sendbuffs[fileno] = (last_sent, buff)
......@@ -498,7 +537,7 @@ class MsgQ:
# Try to send some data from the buffer
(_, msg) = self.sendbuffs[fileno]
sock = self.sockets[fileno]
amount_sent = self.__send_data(sock, msg)
amount_sent = self._send_data(sock, msg)
if amount_sent is not None:
# Keep the rest
msg = msg[amount_sent:]
......@@ -592,12 +631,17 @@ class MsgQ:
self.running = False
if event & select.POLLOUT:
elif event & select.POLLIN:
writable = event & select.POLLOUT
# Note: it may be okay to read data if available
# immediately after write some, but due to unexpected
# regression (see comments on the kqueue version below)
# we restrict one operation per iteration for now.
# In future we may clarify the point and enable the
# "read/write" mode.
readable = not writable and (event & select.POLLIN)
if not writable and not readable:
logger.error(MSGQ_POLL_UNKNOWN_EVENT, fd, event)
self._process_fd(fd, writable, readable, False)
def run_kqueue(self):
while self.running:
......@@ -616,14 +660,35 @@ class MsgQ:
self.running = False
if event.filter == select.KQ_FILTER_WRITE:
if event.filter == select.KQ_FILTER_READ and \ > 0:
elif event.flags & select.KQ_EV_EOF:
fd = event.ident
writable = event.filter == select.KQ_FILTER_WRITE
readable = (event.filter == select.KQ_FILTER_READ and > 0)
# It seems to break some of our test cases if we
# immediately close the socket on EOF after reading
# some data. It may be possible to avoid by tweaking
# the test, but unless we can be sure we'll hold off.
closed = (not readable and
(event.flags & select.KQ_EV_EOF))
self._process_fd(fd, writable, readable, closed)
def _process_fd(self, fd, writable, readable, closed):
'''Process a single FD: unified subroutine of run_kqueue/poller.
closed can be True only in the case of kqueue. This is essentially
private but is defined as if it were "protected" so it's callable
from tests.
# We need to check if FD is still in the sockets dict, because
# it's possible that the socket has been "killed" while processing
# other FDs; it's even possible it's killed within this method.
if writable and fd in self.sockets:
if readable and fd in self.sockets:
self.process_packet(fd, self.sockets[fd])
if closed and fd in self.sockets:
self.kill_socket(fd, self.sockets[fd])
def stop(self):
# Signal it should terminate.
......@@ -760,3 +825,5 @@ if __name__ == "__main__":
......@@ -23,6 +23,31 @@
This is a debug message. The message queue has little bit of special handling
for the configuration manager. This special handling is happening now.
% MSGQ_CLOSE_ON_RECV Reading from socket canceled as it's closed: FD=%1
A debug message. The msgq daemon was notified of a read event on a
socket, but its initial read operation failed because the remote
client has closed its socket. This is possible in a normal operation
when a module shuts down.
% MSGQ_CLOSE_ON_SEND Sending to socket failed as already closed (okay to ignore on shutdown): FD=%1
The msgq daemon tries to send some data to a client module, but it
failed because the socket has been closed. This normally means the
client terminates (for some reason - either gracefully or as a crash)
while other modules try to send a message to the terminated module.
Since msgq doesn't keep track of the status of client modules, this
can happen and is not really an error for msgq; however, it can still
be an unexpected event for the BIND 10 system as a whole in that this
particular message is lost, so it's logged as a warning. If this
message is logged for a running BIND 10 system, it's suggested to
check other log messages; there may be an error from other modules
reporting a missing response message. One common, less critical case
where this message is logged is during shutdown. The ordering of
process shutdown is basically arbitrary at this moment, so it's
possible that some module tries to send a "quitting" message to some
other module but the latter has already shut down. Such cases are
generally non critical, but you may want to check other possible error
% MSGQ_COMMAND Running command %1 with arguments %2
Debug message. The message queue received a command and it is running it.
......@@ -34,6 +59,9 @@ the message queue version and version of the module.
% MSGQ_CONFIG_DATA Received configuration update for the msgq: %1
Debug message. The message queue received a configuration update, handling it.
% MSGQ_EXITING exiting
The msgq daemon is exiting.
% MSGQ_HDR_DECODE_ERR Error decoding header received from socket %1: %2
The socket with mentioned file descriptor sent a packet. However, it was not
possible to decode the routing header of the packet. The packet is ignored.
......@@ -41,6 +69,11 @@ This may be caused by a programmer error (one of the components sending invalid
data) or possibly by incompatible version of msgq and the component (but that's
unlikely, as the protocol is not changed often).
% MSGQ_INVALID_CMD Received invalid command: %1
An unknown command listed in the log has been received. It is ignored. This
indicates either a programmer error (eg. a typo in the command name) or
incompatible version of a module and message queue daemon.
% MSGQ_LISTENER_FAILED Failed to initialize listener on socket file '%1': %2
The message queue daemon tried to listen on a file socket (the path is in the
message), but it failed. The error from the operating system is logged.
......@@ -63,23 +96,29 @@ happen and it is either a programmer error or OS bug. The event is ignored. The
number noted as the event is the raw encoded value, which might be useful to
the authors when figuring the problem out.
% MSGQ_READ_UNKNOWN_FD Got read on strange socket %1
The OS reported a file descriptor is ready to read. But the daemon doesn't know
the mentioned file descriptor, which is either a programmer error or OS bug.
The read event is ignored.
% MSGQ_RECV_ERR Error reading from socket %1: %2
There was a low-level error when reading from a socket. The error is logged and
the corresponding socket is dropped.
the corresponding socket is dropped. The errors include receiving
broken or (non empty but) incomplete data. In either case it usually suggests
something unexpected happens within the BIND 10 system; it's probably
better to restart the system, and if it continues it should be
reported as a bug. One known, probably non critical case is
the "connection reset by peer" (or its variants) socket error appearing
on shutdown. It's known this happens when the remote client closes the
connection as part of shutdown process. Such cases are normally expected
to be reported as receiving empty data (which we log it at the debug level
as the MSGQ_CLOSE_ON_RECV message), but for some (yet) unknown reason
it can also be reported as the system error. At shutdown time it's expected
that connections are closed, so it's probably safe to ignore these messages
in such a case. We still log them as an error as we've not figured out
how exactly that can happen. In future, we may make the shutdown process
more robust so the msgq daemon can explicitly know when a client shuts down
more reliably. If and when it's implemented this error message won't appear
on shutdown unless there's really something unexpected.
% MSGQ_RECV_HDR Received header: %1
Debug message. This message includes the whole routing header of a packet.
% MSGQ_INVALID_CMD Received invalid command: %1
An unknown command listed in the log has been received. It is ignored. This
indicates either a programmer error (eg. a typo in the command name) or
incompatible version of a module and message queue daemon.
% MSGQ_SEND_ERR Error while sending to socket %1: %2
There was a low-level error when sending data to a socket. The error is logged
and the corresponding socket is dropped.
......@@ -87,6 +126,11 @@ and the corresponding socket is dropped.
% MSGQ_SHUTDOWN Stopping Msgq
Debug message. The message queue is shutting down.
% MSGQ_SOCKET_REGISTERED Registered a socket descriptor %1 with lname %2
Debug message. The msgq daemon accepted a session request on the
shown descriptor of socket and assigned a unique identifier (lname)
for the client on that socket.
% MSGQ_SOCK_CLOSE Closing socket fd %1
Debug message. Closing the mentioned socket.
import msgq
from msgq import SubscriptionManager, MsgQ
import unittest
......@@ -559,6 +560,178 @@ class ThreadTests(unittest.TestCase):
class SocketTests(unittest.TestCase):
'''Test cases for micro behaviors related to socket operations.
Some cases are covered as part of other tests, but in this fixture
we check more details of specific method related to socket operation,
with the help of mock classes to avoid expensive overhead.
class MockSocket():
'''A mock socket used instead of standard socket objects.'''
def __init__(self):
self.ex_on_send = None # raised from send() if not None
self.recv_result = b'test' # dummy data or exception
self.blockings = [] # history of setblocking() params
def setblocking(self, on):
def send(self, data):
if self.ex_on_send is not None:
raise self.ex_on_send
return 10 # arbitrary choice
def recv(self, len):
if isinstance(self.recv_result, Exception):
raise self.recv_result
ret = self.recv_result
self.recv_result = b'' # if called again, return empty data
return ret
def fileno(self):
return 42 # arbitrary choice
class LoggerWrapper():
'''A simple wrapper of logger to inspect log messages.'''
def __init__(self, logger):
self.error_called = 0
self.warn_called = 0
self.debug_called = 0
self.orig_logger = logger
def error(self, *args):
self.error_called += 1
def warn(self, *args):
self.warn_called += 1
def debug(self, *args):
self.debug_called += 1
def mock_kill_socket(self, fileno, sock):
'''A replacement of MsgQ.kill_socket method for inspection.'''
self.__killed_socket = (fileno, sock)
if fileno in self.__msgq.sockets:
del self.__msgq.sockets[fileno]
def setUp(self):
self.__msgq = MsgQ()
self.__msgq.kill_socket = self.mock_kill_socket
self.__sock = self.MockSocket()
self.__data = b'dummy'
self.__msgq.sockets[42] = self.__sock
self.__msgq.sendbuffs[42] = (None, b'testdata')
self.__sock_error = socket.error()
self.__killed_socket = None
self.__logger = self.LoggerWrapper(msgq.logger)
msgq.logger = self.__logger
def tearDown(self):
msgq.logger = self.__logger.orig_logger
def test_send_data(self):
# Successful case: _send_data() returns the hardcoded value, and
# setblocking() is called twice with the expected parameters
self.assertEqual(10, self.__msgq._send_data(self.__sock, self.__data))
self.assertEqual([0, 1], self.__sock.blockings)
def test_send_data_interrupt(self):
'''send() is interruptted. send_data() returns 0, sock isn't killed.'''
expected_blockings = []
for eno in [errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR]:
self.__sock_error.errno = eno
self.__sock.ex_on_send = self.__sock_error
self.assertEqual(0, self.__msgq._send_data(self.__sock,
expected_blockings.extend([0, 1])
self.assertEqual(expected_blockings, self.__sock.blockings)
def test_send_data_error(self):
'''Unexpected error happens on send(). The socket is killed.
If the error is EPIPE, it's logged at the warn level; otherwise
an error message is logged.
expected_blockings = []
expected_errors = 0
expected_warns = 0
for eno in [errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS]:
self.__sock_error.errno = eno
self.__sock.ex_on_send = self.__sock_error
self.__killed_socket = None # clear any previuos value
self.assertEqual(None, self.__msgq._send_data(self.__sock,
self.assertEqual((42, self.__sock), self.__killed_socket)
expected_blockings.extend([0, 1])
self.assertEqual(expected_blockings, self.__sock.blockings)
if eno == errno.EPIPE:
expected_warns += 1
expected_errors += 1
self.assertEqual(expected_errors, self.__logger.error_called)
self.assertEqual(expected_warns, self.__logger.warn_called)
def test_process_fd_read_after_bad_write(self):
'''Check the specific case of write fail followed by read attempt.
The write failure results in kill_socket, then read shouldn't tried.
self.__sock_error.errno = errno.EPIPE
self.__sock.ex_on_send = self.__sock_error
self.__msgq.process_socket = None # if called, trigger an exception
self.__msgq._process_fd(42, True, True, False) # shouldn't crash
# check the socket is deleted from the fileno=>sock dictionary
self.assertEqual({}, self.__msgq.sockets)
def test_process_fd_close_after_bad_write(self):
'''Similar to the previous, but for checking dup'ed kill attempt'''
self.__sock_error.errno = errno.EPIPE
self.__sock.ex_on_send = self.__sock_error
self.__msgq._process_fd(42, True, False, True) # shouldn't crash
self.assertEqual({}, self.__msgq.sockets)