Commit cae4ced0 authored by Jelte Jansen's avatar Jelte Jansen
Browse files

[trac758] logging conversion for bob

parent ae4e7f10
SUBDIRS = . tests
sbin_SCRIPTS = bind10
CLEANFILES = bind10 bind10.pyc
CLEANFILES = bind10 bind10.pyc bind10_messages.py bind10_messages.pyc
pkglibexecdir = $(libexecdir)/@PACKAGE@
pyexec_DATA = bind10_messages.py
bind10dir = $(pkgdatadir)
bind10_DATA = bob.spec
EXTRA_DIST = bob.spec
man_MANS = bind10.8
EXTRA_DIST += $(man_MANS) bind10.xml
EXTRA_DIST += $(man_MANS) bind10.xml bind10_messages.mes
if ENABLE_MAN
......@@ -19,6 +20,9 @@ bind10.8: bind10.xml
endif
bind10_messages.py: bind10_messages.mes
$(top_builddir)/src/lib/log/compiler/message -p $(top_srcdir)/src/bin/bind10/bind10_messages.mes
# this is done here since configure.ac AC_OUTPUT doesn't expand exec_prefix
bind10: bind10.py
$(SED) -e "s|@@PYTHONPATH@@|@pyexecdir@|" \
......
......@@ -65,6 +65,16 @@ import posix
import isc.cc
import isc.util.process
import isc.net.parse
import isc.log
from bind10_messages import *
isc.log.init("b10-bind10")
logger = isc.log.Logger("bind10")
# Pending system-wide debug level definitions, the ones we
# use here are hardcoded for now
DBG_PROCESS = 10
DBG_COMMANDS = 30
# Assign this process some longer name
isc.util.process.rename(sys.argv[0])
......@@ -252,8 +262,7 @@ class BoB:
if new_config['start_' + name]:
if not started:
if self.uid is not None:
sys.stderr.write("[bind10] Starting " + name + " as " +
"a user, not root. This might fail.\n")
logger.warn(BIND10_START_AS_NON_ROOT, name)
start()
else:
stop()
......@@ -279,9 +288,8 @@ class BoB:
self.started_auth_family = False
# The real code of the config handler function follows here
if self.verbose:
sys.stdout.write("[bind10] Handling new configuration: " +
str(new_config) + "\n")
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
new_config)
start_stop('resolver', self.started_resolver_family, resolver_on,
resolver_off)
start_stop('auth', self.started_auth_family, auth_on, auth_off)
......@@ -298,8 +306,7 @@ class BoB:
return process_list
def command_handler(self, command, args):
if self.verbose:
sys.stdout.write("[bind10] Boss got command: " + str(command) + "\n")
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
answer = isc.config.ccsession.create_answer(1, "command not implemented")
if type(command) != str:
answer = isc.config.ccsession.create_answer(1, "bad command")
......@@ -332,12 +339,10 @@ class BoB:
start, this runs through the list of started processes, killing
each one. It then clears that list.
"""
if self.verbose:
sys.stdout.write("[bind10] killing started processes:\n")
logger.info(BIND10_KILLING_ALL_PROCESSES)
for pid in self.processes:
if self.verbose:
sys.stdout.write("[bind10] - %s\n" % self.processes[pid].name)
logger.info(BIND10_KILL_PROCESS, self.processes[pid].name)
self.processes[pid].process.kill()
self.processes = {}
......@@ -351,18 +356,14 @@ class BoB:
xfrin/xfrout and zone manager as we don't need to start those if we
are not running the authoritative server.)
"""
if self.verbose:
sys.stdout.write("[bind10] Reading Boss configuration:\n")
logger.info(BIND10_READING_BOSS_CONFIGURATION)
config_data = self.ccs.get_full_config()
self.cfg_start_auth = config_data.get("start_auth")
self.cfg_start_resolver = config_data.get("start_resolver")
if self.verbose:
sys.stdout.write("[bind10] - start_auth: %s\n" %
str(self.cfg_start_auth))
sys.stdout.write("[bind10] - start_resolver: %s\n" %
str(self.cfg_start_resolver))
logger.info(BIND10_CONFIGURATION_START_AUTH, self.cfg_start_auth)
logger.info(BIND10_CONFIGURATION_START_RESOLVER, self.cfg_start_resolver)
def log_starting(self, process, port = None, address = None):
"""
......@@ -377,13 +378,14 @@ class BoB:
appended to the message (if present).
"""
self.curproc = process
if self.verbose:
sys.stdout.write("[bind10] Starting %s" % self.curproc)
if port is not None:
sys.stdout.write(" on port %d" % port)
if address is not None:
sys.stdout.write(" (address %s)" % str(address))
sys.stdout.write("\n")
if port is None and address is None:
logger.debug(DBG_PROCESS, BIND10_STARTING_PROCESS, self.curproc)
elif address is None:
logger.debug(DBG_PROCESS, BIND10_STARTING_PROCESS_PORT, self.curproc,
port)
else:
logger.debug(DBG_PROCESS, BIND10_STARTING_PROCESS_PORT_ADDRESS,
self.curproc, address, port)
def log_started(self, pid = None):
"""
......@@ -391,11 +393,10 @@ class BoB:
message. As with starting_message(), this ensures a consistent
format.
"""
if self.verbose:
sys.stdout.write("[bind10] Started %s" % self.curproc)
if pid is not None:
sys.stdout.write(" (PID %d)" % pid)
sys.stdout.write("\n")
if pid is None:
logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
else:
logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)
# The next few methods start the individual processes of BIND-10. They
# are called via start_all_processes(). If any fail, an exception is
......@@ -459,7 +460,9 @@ class BoB:
"""
self.log_starting("ccsession")
self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
self.config_handler, self.command_handler)
self.config_handler,
self.command_handler,
None, True)
self.ccs.start()
self.log_started()
......@@ -620,12 +623,12 @@ class BoB:
# running
c_channel_env = {}
if self.msgq_socket_file is not None:
c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
if self.verbose:
sys.stdout.write("[bind10] Checking for already running b10-msgq\n")
c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
# try to connect, and if we can't wait a short while
try:
self.cc_session = isc.cc.Session(self.msgq_socket_file)
logger.fatal(BIND10_MSGQ_ALREADY_RUNNING);
return "b10-msgq already running, or socket file not cleaned , cannot start"
except isc.cc.session.SessionError:
# this is the case we want, where the msgq is not running
......@@ -663,8 +666,7 @@ class BoB:
Stop the given process, friendly-like. The process is the name it has
(in logs, etc), the recipient is the address on msgq.
"""
if self.verbose:
sys.stdout.write("[bind10] Asking %s to terminate\n" % process)
logger.info(BIND10_STOP_PROCESS, process)
# TODO: Some timeout to solve processes that don't want to die would
# help. We can even store it in the dict, it is used only as a set
self.expected_shutdowns[process] = 1
......@@ -690,8 +692,7 @@ class BoB:
def shutdown(self):
"""Stop the BoB instance."""
if self.verbose:
sys.stdout.write("[bind10] Stopping the server.\n")
logger.info(BIND10_SHUTDOWN)
# first try using the BIND 10 request to stop
try:
self.stop_all_processes()
......@@ -705,9 +706,8 @@ class BoB:
# next try sending a SIGTERM
processes_to_stop = list(self.processes.values())
for proc_info in processes_to_stop:
if self.verbose:
sys.stdout.write("[bind10] Sending SIGTERM to %s (PID %d).\n" %
(proc_info.name, proc_info.pid))
logger.info(BIND10_SEND_SIGTERM, proc_info.name,
proc_info.pid)
try:
proc_info.process.terminate()
except OSError:
......@@ -721,17 +721,15 @@ class BoB:
self.reap_children()
processes_to_stop = list(self.processes.values())
for proc_info in processes_to_stop:
if self.verbose:
sys.stdout.write("[bind10] Sending SIGKILL to %s (PID %d).\n" %
(proc_info.name, proc_info.pid))
logger.info(BIND10_SEND_SIGKILL, proc_info.name,
proc_info.pid)
try:
proc_info.process.kill()
except OSError:
# ignore these (usually ESRCH because the child
# finally exited)
pass
if self.verbose:
sys.stdout.write("[bind10] All processes ended, server done.\n")
logger.info(BIND10_SHUTDOWN_COMPLETE)
def _get_process_exit_status(self):
return os.waitpid(-1, os.WNOHANG)
......@@ -759,18 +757,16 @@ class BoB:
# elsewhere.
if self.runnable:
if exit_status is None:
sys.stdout.write(
"[bind10] Process %s (PID %d) died: exit status not available" %
(proc_info.name, proc_info.pid))
logger.info(BIND10_PROCESS_ENDED_NO_EXIT_STATUS,
proc_info.name, proc_info.pid)
else:
sys.stdout.write(
"[bind10] Process %s (PID %d) terminated, exit status = %d\n" %
(proc_info.name, proc_info.pid, exit_status))
logger.info(BIND10_PROCESS_ENDED_WITH_EXIT_STATUS,
proc_info.name, proc_info.pid,
exit_status)
# Was it a special process?
if proc_info.name == "b10-msgq":
sys.stdout.write(
"[bind10] The b10-msgq process died, shutting down.\n")
logger.fatal(BIND10_MSGQ_DAEMON_ENDED)
self.runnable = False
# If we're in 'brittle' mode, we want to shutdown after
......@@ -778,7 +774,7 @@ class BoB:
if self.brittle:
self.runnable = False
else:
sys.stdout.write("[bind10] Unknown child pid %d exited.\n" % pid)
logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
def restart_processes(self):
"""
......@@ -809,14 +805,11 @@ class BoB:
next_restart = restart_time
still_dead[proc_info.pid] = proc_info
else:
if self.verbose:
sys.stdout.write("[bind10] Resurrecting dead %s process...\n" %
proc_info.name)
logger.info(BIND10_RESURRECTING_PROCESS, proc_info.name)
try:
proc_info.respawn()
self.processes[proc_info.pid] = proc_info
sys.stdout.write("[bind10] Resurrected %s (PID %d)\n" %
(proc_info.name, proc_info.pid))
logger.info(BIND10_RESURRECTED_PROCESS, proc_info.name, proc_info.pid)
except:
still_dead[proc_info.pid] = proc_info
# remember any processes that refuse to be resurrected
......@@ -848,8 +841,7 @@ def fatal_signal(signal_number, stack_frame):
"""We need to exit (SIGINT or SIGTERM received)."""
global options
global boss_of_bind
if options.verbose:
sys.stdout.write("[bind10] Received %s.\n" % get_signame(signal_number))
logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
boss_of_bind.runnable = False
......@@ -967,12 +959,11 @@ def main():
pass
if setuid is None:
sys.stderr.write("bind10: invalid user: '%s'\n" % options.user)
logger.fatal(BIND10_INVALID_USER, options.user)
sys.exit(1)
# Announce startup.
if options.verbose:
sys.stdout.write("%s\n" % VERSION)
logger.info(BIND10_STARTING, VERSION)
# Create wakeup pipe for signal handlers
wakeup_pipe = os.pipe()
......@@ -994,9 +985,9 @@ def main():
setuid, username, options.cmdctl_port, options.brittle)
startup_result = boss_of_bind.startup()
if startup_result:
sys.stderr.write("[bind10] Error on startup: %s\n" % startup_result)
logger.fatal(BIND10_STARTUP_ERROR, startup_result)
sys.exit(1)
sys.stdout.write("[bind10] BIND 10 started\n")
logger.info(BIND10_STARTUP_COMPLETE)
dump_pid(options.pid_file)
# In our main loop, we check for dead processes or messages
......@@ -1022,7 +1013,7 @@ def main():
if err.args[0] == errno.EINTR:
(rlist, wlist, xlist) = ([], [], [])
else:
sys.stderr.write("[bind10] Error with select(); %s\n" % err)
logger.fatal(BIND10_SELECT_ERROR, err)
break
for fd in rlist + xlist:
......@@ -1030,8 +1021,7 @@ def main():
try:
boss_of_bind.ccs.check_command()
except isc.cc.session.ProtocolError:
if options.verbose:
sys.stderr.write("[bind10] msgq channel disappeared.\n")
logger.fatal(BIND10_MSGQ_DISAPPEARED)
break
elif fd == wakeup_fd:
os.read(wakeup_fd, 32)
......@@ -1039,7 +1029,6 @@ def main():
# shutdown
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
boss_of_bind.shutdown()
sys.stdout.write("[bind10] BIND 10 exiting\n");
unlink_pid_file(options.pid_file)
sys.exit(0)
......
# Copyright (C) 2011 Internet Systems Consortium, Inc. ("ISC")
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
# No namespace declaration - these constants go in the global namespace
# of the xfrin messages python module.
% BIND10_CHECK_MSGQ_ALREADY_RUNNING checking if msgq is already running
The boss process is starting up and will now check if the message bus
daemon is already running. If so, it will not be able to start, as it
needs a dedicated message bus.
% BIND10_CONFIGURATION_START_AUTH start authoritative server: %1
This message shows whether or not the authoritative server should be
started according to the configuration.
% BIND10_CONFIGURATION_START_RESOLVER start resolver: %1
This message shows whether or not the resolver should be
started according to the configuration.
% BIND10_INVALID_USER invalid user: '%1'
The boss process was started with the -u option, to drop root priveleges
and continue running as the specified user, but the user is unknown.
% BIND10_KILL_PROCESS killing process %1
The boss module is sending a kill signal to the given process, as part
of the process of killing all started processes during a failed startup,
as described for BIND10_KILLING_ALL_PROCESSES
% BIND10_KILLING_ALL_PROCESSES killing all started processes
The boss module was not able to start every process it needed to start
during startup, and will now kill the processes that did get started.
% BIND10_MSGQ_ALREADY_RUNNING msgq daemon already running, cannot start
There already appears to me a message bus daemon running. Either an
old process was not shut down correctly, and needs to be killed, or
another instance of BIND10, with the same msgq domain socket, is
running, which needs to be stopped.
% BIND10_MSGQ_DAEMON_ENDED b10-msgq process died, shutting down
The message bus daemon has died. This is a fatal error, since it may
leave the system in an inconsistent state. BIND10 will now shut down
% BIND10_MSGQ_DISAPPEARED msgq channel disappeared
While listening on the message bus channel for messages, it suddenly
disappeared. The msgq daemon may have died. This might lead to an
inconsistend state of the system, and BIND 10 will now shut down.
% BIND10_PROCESS_ENDED_NO_EXIT_STATUS Process %1 (PID %2) died: exit status not available
The given process ended unexpectedly, but no exit status is
available. Depending on which module it was, it may simply be
restarted, or it may be a problem that will cause the boss module to
shut down too. The latter happens if it was the message bus daemon,
which, if it has died suddenly, may leave the system in an
inconsistent state. BIND10 will also shut down now if it has been run
with --brittle.
% BIND10_PROCESS_ENDED_WITH_EXIT_STATUS Process %1 (PID %2) terminated, exit status = %3
The given process ended unexpectedly with the given exit status.
Depending on which module it was, it may simply be restarted, or it
may be a problem that will cause the boss module to shut down too.
The latter happens if it was the message bus daemon, which, if it has
died suddenly, may leave the system in an inconsistent state. BIND10
will also shut down now if it has been run with --brittle.
% BIND10_READING_BOSS_CONFIGURATION reading boss configuration
The boss process is starting up, and will now process the initial
configuration, as received from the configuration manager.
% BIND10_RECEIVED_COMMAND received command: %1
The boss module received a command and shall now process it. The command
is printed.
% BIND10_RECEIVED_NEW_CONFIGURATION received new configuration: %1
The boss module received a configuration update and is going to apply
it now. The new configuration is printed.
% BIND10_RECEIVED_SIGNAL received signal %1
The boss module received the given signal.
% BIND10_RESURRECTED_PROCESS resurrected %1 (PID %2)
The given process has been restarted succesfully, and is now running
with the given process id.
% BIND10_RESURRECTING_PROCESS resurrecting dead %1 process...
The given process has ended unexpectedly, and is now restarted.
% BIND10_SELECT_ERROR error in select() call: %1
There was a fatal error in the call to select(), used to see if a child
process has ended or if there is a message on the message bus. This
should not happen under normal circumstances and is considered fatal,
so BIND 10 will now shut down. The specific error is printed.
% BIND10_SEND_SIGKILL sending SIGKILL to %1 (PID %2)
The boss module is sending a SIGKILL signal to the given process.
% BIND10_SEND_SIGTERM sending SIGTERM to %1 (PID %2)
The boss module is sending a SIGTERM signal to the given process.
% BIND10_SHUTDOWN stopping the server
The boss process received a command or signal telling it to shut down.
It will send shutdown commands to each process. The processes that do
not shut down will then receive a SIGTERM signal. If that doesn't work,
it shall send SIGKILL signals to the processes still alive.
% BIND10_SHUTDOWN_COMPLETE all processes ended, shutdown complete
All child processes have been stopped, and the boss process will now
stop itself.
% BIND10_START_AS_NON_ROOT starting %1 as a user, not root. This might fail.
The given module is being started or restarted without root privileges.
If the module needs these privileges, it may have problems starting.
Note that this issue should be resolved by the pending 'socket-creator'
process; once that has been implemented, modules should not need root
privileges anymore.
% BIND10_STARTED_PROCESS started %1
The given process has successfully been started
% BIND10_STARTED_PROCESS_PID started %1 (PID %2)
The given process has successfully been started, and has the given PID.
% BIND10_STARTING starting BIND10: %1
Informational message on startup that show the full version.
% BIND10_STARTING_PROCESS starting process %1
The boss module is starting the given process.
% BIND10_STARTING_PROCESS_PORT starting process %1 (to listen on port %2)
The boss module is starting the given process, which will listen on the
given port number.
% BIND10_STARTING_PROCESS_PORT_ADDRESS starting process %1 (to listen on %2:%3)
The boss module is starting the given process, which will listen on the
given address and port number.
% BIND10_STARTUP_COMPLETE BIND 10 started
All modules have been successfully started, and BIND 10 is now running.
% BIND10_STARTUP_ERROR error during startup: %1
There was a fatal error when BIND10 was trying to start. The error is
shown, and BIND10 will now shut down.
% BIND10_STOP_PROCESS asking %1 to shut down
The boss module is sending a shutdown command to the given module over
the message channel.
% BIND10_UNKNOWN_CHILD_PROCESS_ENDED unknown child pid %1 exited
An unknown child process has exited. The PID is printed, but no further
action will be taken by the boss process.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment