bind10.py.in 23.1 KB
Newer Older
1 2
#!@PYTHON@

3 4 5 6
"""\
This file implements the Boss of Bind (BoB, or bob) program.

It's purpose is to start up the BIND 10 system, and then manage the
Shane Kerr's avatar
Shane Kerr committed
7 8
processes, by starting and stopping processes, plus restarting
processes that exit.
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23

To start the system, it first runs the c-channel program (msgq), then
connects to that. It then runs the configuration manager, and reads
its own configuration. Then it proceeds to starting other modules.

The Python subprocess module is used for starting processes, but
because this is not efficient for managing groups of processes,
SIGCHLD signals are caught and processed using the signal module.

Most of the logic is contained in the BoB class. However, since Python
requires that signal processing happen in the main thread, we do
signal handling outside of that class, in the code running for
__main__.
"""

24 25 26
import sys; sys.path.append ('@@PYTHONPATH@@')
import os

Jelte Jansen's avatar
Jelte Jansen committed
27 28 29
# If B10_FROM_SOURCE is set in the environment, we use data files
# from a directory relative to that, otherwise we use the ones
# installed on the system
30 31
if "B10_FROM_SOURCE" in os.environ:
    SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
Jelte Jansen's avatar
Jelte Jansen committed
32 33 34 35 36
else:
    PREFIX = "@prefix@"
    DATAROOTDIR = "@datarootdir@"
    SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
    
37 38
# TODO: start up statistics thingy

39 40 41 42 43
import subprocess
import signal
import re
import errno
import time
44
import select
45
import random
46 47
from optparse import OptionParser, OptionValueError

48
import isc.cc
49 50

# This is the version that gets displayed to the user.
51
__version__ = "v20100310"
52

53 54 55
# Nothing at all to do with the 1990-12-10 article here:
# http://www.subgenius.com/subg-digest/v2/0056.html

56 57 58 59 60 61 62 63 64 65 66 67 68 69
class RestartSchedule:
    """
Keeps state when restarting something (in this case, a process).

When a process dies unexpectedly, we need to restart it. However, if 
it fails to restart for some reason, then we should not simply keep
restarting it at high speed.

A more sophisticated algorithm can be developed, but for now we choose
a simple set of rules:

  * If a process was been running for >=10 seconds, we restart it
    right away.
  * If a process was running for <10 seconds, we wait until 10 seconds
70 71 72 73
    after it was started.

To avoid programs getting into lockstep, we use a normal distribution
to avoid being restarted at exactly 10 seconds."""
74 75 76 77 78 79 80 81 82 83 84

    def __init__(self, restart_frequency=10.0):
        self.restart_frequency = restart_frequency
        self.run_start_time = None
        self.run_stop_time = None
        self.restart_time = None
    
    def set_run_start_time(self, when=None):
        if when is None:
            when = time.time()
        self.run_start_time = when
85 86 87
        sigma = self.restart_frequency * 0.05
        self.restart_time = when + random.normalvariate(self.restart_frequency, 
                                                        sigma)
88 89

    def set_run_stop_time(self, when=None):
90 91
        """We don't actually do anything with stop time now, but it 
        might be useful for future algorithms."""
92 93 94 95 96 97 98 99 100
        if when is None:
            when = time.time()
        self.run_stop_time = when

    def get_restart_time(self, when=None):
        if when is None:
            when = time.time()
        return max(when, self.restart_time)

101 102 103 104 105 106
class ProcessInfo:
    """Information about a process"""

    dev_null = open("/dev/null", "w")

    def _spawn(self):
107 108 109 110
        if self.dev_null_stdout:
            spawn_stdout = self.dev_null
        else:
            spawn_stdout = None
111 112
        spawn_env = self.env
        spawn_env['PATH'] = os.environ['PATH']
113 114
        if 'B10_FROM_SOURCE' in os.environ:
            spawn_env['B10_FROM_SOURCE'] = os.environ['B10_FROM_SOURCE']
115 116
        else: 
            spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
Jelte Jansen's avatar
Jelte Jansen committed
117 118
        if 'PYTHON_EXEC' in os.environ:
            spawn_env['PYTHON_EXEC'] = os.environ['PYTHON_EXEC']
119 120
        if 'PYTHONPATH' in os.environ:
            spawn_env['PYTHONPATH'] = os.environ['PYTHONPATH']
121
        spawn_env['ISC_MSGQ_PORT'] = self.c_channel_port
122 123
        self.process = subprocess.Popen(self.args,
                                        stdin=subprocess.PIPE,
124 125
                                        stdout=spawn_stdout,
                                        stderr=spawn_stdout,
126
                                        close_fds=True,
127
                                        env=spawn_env,)
128
        self.pid = self.process.pid
129
        self.restart_schedule.set_run_start_time()
130

131
    def __init__(self, name, args, env={}, dev_null_stdout=False):
132 133 134
        self.name = name 
        self.args = args
        self.env = env
135
        self.dev_null_stdout = dev_null_stdout
136
        self.restart_schedule = RestartSchedule()
137 138 139 140 141
        self._spawn()

    def respawn(self):
        self._spawn()

142 143
class BoB:
    """Boss of BIND class."""
144
    def __init__(self, c_channel_port=9912, auth_port=5300, verbose=False):
145 146 147 148 149 150 151
        """Initialize the Boss of BIND. This is a singleton (only one
        can run).
        
        The c_channel_port specifies the TCP/IP port that the msgq
        process listens on. If verbose is True, then the boss reports
        what it is doing.
        """
Shane Kerr's avatar
Shane Kerr committed
152
        self.verbose = verbose
153
        self.c_channel_port = c_channel_port
154
        self.auth_port = auth_port
155
        self.cc_session = None
156
        self.ccs = None
157 158
        self.processes = {}
        self.dead_processes = {}
159
        self.runnable = False
160

161 162 163 164
    def config_handler(self, new_config):
        if self.verbose:
            print("[XX] handling new config:")
            print(new_config)
165
        answer = isc.config.ccsession.create_answer(0)
166
        return answer
167
        # TODO
168

Jelte Jansen's avatar
Jelte Jansen committed
169
    def command_handler(self, command, args):
170 171 172
        if self.verbose:
            print("[XX] Boss got command:")
            print(command)
173
        answer = [ 1, "Command not implemented" ]
Jelte Jansen's avatar
Jelte Jansen committed
174
        if type(command) != str:
Jelte Jansen's avatar
Jelte Jansen committed
175
            answer = isc.config.ccsession.create_answer(1, "bad command")
176
        else:
Jelte Jansen's avatar
Jelte Jansen committed
177
            cmd = command
Jelte Jansen's avatar
Jelte Jansen committed
178
            if cmd == "shutdown":
179 180
                print("[XX] got shutdown command")
                self.runnable = False
Jelte Jansen's avatar
Jelte Jansen committed
181
                answer = isc.config.ccsession.create_answer(0)
182
            elif cmd == "print_message":
Jelte Jansen's avatar
Jelte Jansen committed
183 184 185
                if args:
                    print(args)
                answer = isc.config.ccsession.create_answer(0, args)
186
            elif cmd == "print_settings":
Jelte Jansen's avatar
Jelte Jansen committed
187 188 189 190
                print("Full Config:")
                full_config = self.ccs.get_full_config()
                for item in full_config:
                    print(item + ": " + str(full_config[item]))
Jelte Jansen's avatar
Jelte Jansen committed
191
                answer = isc.config.ccsession.create_answer(0)
Jelte Jansen's avatar
Jelte Jansen committed
192
            else:
Jelte Jansen's avatar
Jelte Jansen committed
193
                answer = isc.config.ccsession.create_answer(1, "Unknown command")
Jelte Jansen's avatar
Jelte Jansen committed
194
        return answer
195
    
196 197 198 199 200 201
    def startup(self):
        """Start the BoB instance.
 
        Returns None if successful, otherwise an string describing the
        problem.
        """
202 203 204 205 206 207 208 209 210 211 212 213
        # try to connect to the c-channel daemon, 
        # to see if it is already running
        c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
        if self.verbose:
            sys.stdout.write("Checking for already running msgq\n")
        # try to connect, and if we can't wait a short while
        try:
            self.cc_session = isc.cc.Session(self.c_channel_port)
            return "msgq already running, cannot start"
        except isc.cc.session.SessionError:
            pass

214
        # start the c-channel daemon
215
        if self.verbose:
216 217
            sys.stdout.write("Starting msgq using port %d\n" % 
                             self.c_channel_port)
218
        try:
219
            c_channel = ProcessInfo("msgq", "msgq", c_channel_env, True)
220 221
        except Exception as e:
            return "Unable to start msgq; " + str(e)
222 223
        self.processes[c_channel.pid] = c_channel
        if self.verbose:
224
            sys.stdout.write("Started msgq (PID %d)\n" % c_channel.pid)
225 226 227 228 229 230

        # now connect to the c-channel
        cc_connect_start = time.time()
        while self.cc_session is None:
            # if we have been trying for "a while" give up
            if (time.time() - cc_connect_start) > 5:
231
                c_channel.process.kill()
232 233 234
                return "Unable to connect to c-channel after 5 seconds"
            # try to connect, and if we can't wait a short while
            try:
235 236
                self.cc_session = isc.cc.Session(self.c_channel_port)
            except isc.cc.session.SessionError:
237
                time.sleep(0.1)
238
        #self.cc_session.group_subscribe("Boss", "boss")
239 240 241

        # start the configuration manager
        if self.verbose:
Jelte Jansen's avatar
Jelte Jansen committed
242
            sys.stdout.write("Starting b10-cfgmgr\n")
243
        try:
Jelte Jansen's avatar
Jelte Jansen committed
244
            bind_cfgd = ProcessInfo("b10-cfgmgr", "b10-cfgmgr")
245
        except Exception as e:
246
            c_channel.process.kill()
Jelte Jansen's avatar
Jelte Jansen committed
247
            return "Unable to start b10-cfgmgr; " + str(e)
248 249
        self.processes[bind_cfgd.pid] = bind_cfgd
        if self.verbose:
Jelte Jansen's avatar
Jelte Jansen committed
250
            sys.stdout.write("Started b10-cfgmgr (PID %d)\n" % bind_cfgd.pid)
251

252 253
        # TODO: once this interface is done, replace self.cc_session
        # by this one
Jelte Jansen's avatar
Jelte Jansen committed
254
        # sleep until b10-cfgmgr is fully up and running, this is a good place
255 256 257 258
        # to have a (short) timeout on synchronized groupsend/receive
        time.sleep(1)
        if self.verbose:
            print("[XX] starting ccsession")
259
        self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION, self.config_handler, self.command_handler)
260
        self.ccs.start()
261 262 263
        if self.verbose:
            print("[XX] ccsession started")

264
        # start b10-auth
265
        # XXX: this must be read from the configuration manager in the future
266
        authargs = ['b10-auth', '-p', str(self.auth_port)]
267
        if self.verbose:
268 269 270
            sys.stdout.write("Starting b10-auth using port %d\n" %
                             self.auth_port)
            authargs += ['-v']
271
        try:
272
            auth = ProcessInfo("b10-auth", authargs)
273
        except Exception as e:
274 275
            c_channel.process.kill()
            bind_cfgd.process.kill()
276 277
            return "Unable to start b10-auth; " + str(e)
        self.processes[auth.pid] = auth
278
        if self.verbose:
279
            sys.stdout.write("Started b10-auth (PID %d)\n" % auth.pid)
280

Likun Zhang's avatar
Likun Zhang committed
281 282 283 284 285 286 287 288 289 290 291 292 293 294
        # start the b10-xfrin
        if self.verbose:
            sys.stdout.write("Starting b10-xfrin\n")
        try:
            xfrind = ProcessInfo("b10-xfrin", ['b10-xfrin'])
        except Exception as e:
            c_channel.process.kill()
            bind_cfgd.process.kill()
            auth.process.kill()
            return "Unable to start b10-xfrin; " + str(e)
        self.processes[xfrind.pid] = xfrind
        if self.verbose:
            sys.stdout.write("Started b10-xfrin (PID %d)\n" % xfrind.pid)

Jelte Jansen's avatar
Jelte Jansen committed
295
        # start the b10-cmdctl
296 297
        # XXX: we hardcode port 8080
        if self.verbose:
Jelte Jansen's avatar
Jelte Jansen committed
298
            sys.stdout.write("Starting b10-cmdctl on port 8080\n")
299
        try:
Jelte Jansen's avatar
Jelte Jansen committed
300
            cmd_ctrld = ProcessInfo("b10-cmdctl", ['b10-cmdctl'])
301 302 303
        except Exception as e:
            c_channel.process.kill()
            bind_cfgd.process.kill()
304
            auth.process.kill()
Likun Zhang's avatar
Likun Zhang committed
305
            xfrind.process.kill()
Jelte Jansen's avatar
Jelte Jansen committed
306
            return "Unable to start b10-cmdctl; " + str(e)
307 308
        self.processes[cmd_ctrld.pid] = cmd_ctrld
        if self.verbose:
Jelte Jansen's avatar
Jelte Jansen committed
309
            sys.stdout.write("Started b10-cmdctl (PID %d)\n" % cmd_ctrld.pid)
310

311
        self.runnable = True
312

313 314
        return None

315 316
    def stop_all_processes(self):
        """Stop all processes."""
317 318
        cmd = { "command": ['shutdown']}
        self.cc_session.group_sendmsg(cmd, 'Boss', 'Cmd-Ctrld')
319
        self.cc_session.group_sendmsg(cmd, "Boss", "ConfigManager")
320
        self.cc_session.group_sendmsg(cmd, "Boss", "Auth")
321

322 323 324 325 326 327 328 329 330 331
    def stop_process(self, process):
        """Stop the given process, friendly-like."""
        # XXX nothing yet
        pass

    def shutdown(self):
        """Stop the BoB instance."""
        if self.verbose:
            sys.stdout.write("Stopping the server.\n")
        # first try using the BIND 10 request to stop
332 333 334 335
        try:
            self.stop_all_processes()
        except:
            pass
336 337
        # XXX: some delay probably useful... how much is uncertain
        time.sleep(0.1)  
338
        self.reap_children()
339
        # next try sending a SIGTERM
340
        processes_to_stop = list(self.processes.values())
341
        unstopped_processes = []
342
        for proc_info in processes_to_stop:
343
            if self.verbose:
344 345
                sys.stdout.write("Sending SIGTERM to %s (PID %d).\n" % 
                                 (proc_info.name, proc_info.pid))
346
            try:
347
                proc_info.process.terminate()
Shane Kerr's avatar
Shane Kerr committed
348
            except OSError:
349 350 351
                # ignore these (usually ESRCH because the child
                # finally exited)
                pass
352 353
        # XXX: some delay probably useful... how much is uncertain
        time.sleep(0.1)  
354
        self.reap_children()
355 356
        # finally, send a SIGKILL (unmaskable termination)
        processes_to_stop = unstopped_processes
357
        for proc_info in processes_to_stop:
358
            if self.verbose:
359 360
                sys.stdout.write("Sending SIGKILL to %s (PID %d).\n" % 
                                 (proc_info.name, proc_info.pid))
361
            try:
362
                proc_info.process.kill()
Shane Kerr's avatar
Shane Kerr committed
363
            except OSError:
364 365 366 367 368 369
                # ignore these (usually ESRCH because the child
                # finally exited)
                pass
        if self.verbose:
            sys.stdout.write("All processes ended, server done.\n")

370 371 372
    def reap_children(self):
        """Check to see if any of our child processes have exited, 
        and note this for later handling. 
373
        """
374 375 376 377 378 379 380 381 382 383
        while True:
            try:
                (pid, exit_status) = os.waitpid(-1, os.WNOHANG)
            except OSError as o:
                if o.errno == errno.ECHILD: break
                # XXX: should be impossible to get any other error here
                raise
            if pid == 0: break
            if pid in self.processes:
                proc_info = self.processes.pop(pid)
384
                proc_info.restart_schedule.set_run_stop_time()
385 386 387 388 389
                self.dead_processes[proc_info.pid] = proc_info
                if self.verbose:
                    sys.stdout.write("Process %s (PID %d) died.\n" % 
                                     (proc_info.name, proc_info.pid))
                if proc_info.name == "msgq":
Shane Kerr's avatar
Shane Kerr committed
390
                    if self.verbose and self.runnable:
391 392 393 394 395
                        sys.stdout.write(
                                     "The msgq process died, shutting down.\n")
                    self.runnable = False
            else:
                sys.stdout.write("Unknown child pid %d exited.\n" % pid)
396

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
    # 'old' command style, uncommented for now
    # move the handling below move to command_handler please
    #def recv_and_process_cc_msg(self):
        #"""Receive and process the next message on the c-channel,
        #if any."""
        #self.ccs.checkCommand()
        #msg, envelope = self.cc_session.group_recvmsg(False)
        #print(msg)
        #if msg is None:
        #    return
        #if not ((type(msg) is dict) and (type(envelope) is dict)):
        #    if self.verbose:
        #        sys.stdout.write("Non-dictionary message\n")
        #    return
        #if not "command" in msg:
        #    if self.verbose:
        #        if "msg" in envelope:
        #            del envelope['msg']
        #        sys.stdout.write("Unknown message received\n")
        #        sys.stdout.write(pprint.pformat(envelope) + "\n")
        #        sys.stdout.write(pprint.pformat(msg) + "\n")
        #    return

        #cmd = msg['command']
        #if not (type(cmd) is list):
        #    if self.verbose:
        #        sys.stdout.write("Non-list command\n")
        #    return
        #
Shane Kerr's avatar
Shane Kerr committed
426
        # done checking and extracting... time to execute the command
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
        #if cmd[0] == "shutdown":
        #    if self.verbose:
        #        sys.stdout.write("shutdown command received\n")
        #    self.runnable = False
        #    # XXX: reply here?
        #elif cmd[0] == "getProcessList":
        #    if self.verbose:
        #        sys.stdout.write("getProcessList command received\n")
        #    live_processes = [ ]
        #    for proc_info in processes:
        #        live_processes.append({ "name": proc_info.name, 
        #                                "args": proc_info.args, 
        #                                "pid": proc_info.pid, })
        #    dead_processes = [ ]
        #    for proc_info in dead_processes:
        #        dead_processes.append({ "name": proc_info.name, 
        #                                "args": proc_info.args, })
        #    cc.group_reply(envelope, { "response": cmd,
        #                               "sent": msg["sent"],
        #                               "live_processes": live_processes,
        #                               "dead_processes": dead_processes, })
        #else:
        #    if self.verbose:
        #        sys.stdout.write("Unknown command %s\n" % str(cmd))
451 452 453

    def restart_processes(self):
        """Restart any dead processes."""
454
        next_restart = None
455 456
        # if we're shutting down, then don't restart
        if not self.runnable:
457
            return next_restart
458
        # otherwise look through each dead process and try to restart
459
        still_dead = {}
460
        now = time.time()
461
        for proc_info in self.dead_processes.values():
462 463 464 465 466 467 468 469
            restart_time = proc_info.restart_schedule.get_restart_time(now)
            if restart_time > now:
#                if self.verbose:
#                    sys.stdout.write("Dead %s process waiting %.1f seconds "\
#                                     "for resurrection\n" % 
#                                     (proc_info.name, (restart_time-now)))
                if (next_restart is None) or (next_restart > restart_time):
                    next_restart = restart_time
470
                still_dead[proc_info.pid] = proc_info
471 472 473 474 475 476 477 478 479 480 481 482
            else:
                if self.verbose:
                    sys.stdout.write("Resurrecting dead %s process...\n" % 
                                     proc_info.name)
                try:
                    proc_info.respawn()
                    self.processes[proc_info.pid] = proc_info
                    if self.verbose:
                        sys.stdout.write("Resurrected %s (PID %d)\n" %
                                         (proc_info.name, proc_info.pid))
                except:
                    still_dead[proc_info.pid] = proc_info
483 484
        # remember any processes that refuse to be resurrected
        self.dead_processes = still_dead
485 486
        # return the time when the next process is ready to be restarted
        return next_restart
487

488 489 490 491
# global variables, needed for signal handlers
options = None
boss_of_bind = None

Shane Kerr's avatar
Shane Kerr committed
492 493 494 495 496 497
def reaper(signal_number, stack_frame):
    """A child process has died (SIGCHLD received)."""
    # don't do anything... 
    # the Python signal handler has been set up to write
    # down a pipe, waking up our select() bit
    pass
498
                   
Shane Kerr's avatar
Shane Kerr committed
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
def get_signame(signal_number):
    """Return the symbolic name for a signal."""
    for sig in dir(signal):
        if sig.startswith("SIG") and sig[3].isalnum():
            if getattr(signal, sig) == signal_number:
                return sig
    return "Unknown signal %d" % signal_number

# XXX: perhaps register atexit() function and invoke that instead
def fatal_signal(signal_number, stack_frame):
    """We need to exit (SIGINT or SIGTERM received)."""
    global options
    global boss_of_bind
    if options.verbose:
        sys.stdout.write("Received %s.\n" % get_signame(signal_number))
    signal.signal(signal.SIGCHLD, signal.SIG_DFL)
    boss_of_bind.runnable = False

def check_port(option, opt_str, value, parser):
    """Function to insure that the port we are passed is actually 
    a valid port number. Used by OptionParser() on startup."""
    if not re.match('^(6553[0-5]|655[0-2]\d|65[0-4]\d\d|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{0,3}|0)$', value):
        raise OptionValueError("%s requires a port number (0-65535)" % opt_str)
522 523 524 525 526 527
    if (opt_str == '-m' or opt_str == '--msgq-port'):
        parser.values.msgq_port = value
    elif (opt_str == '-p' or opt_str == '--port'):
        parser.values.auth_port = value
    else:
        raise OptionValueError("Unknown option " + opt_str)
Shane Kerr's avatar
Shane Kerr committed
528 529 530 531
  
def main():
    global options
    global boss_of_bind
532
    # Parse any command-line options.
533 534 535
    parser = OptionParser(version=__version__)
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
                      help="display more about what is going on")
536 537 538
    parser.add_option("-p", "--port", dest="auth_port", type="string",
                      action="callback", callback=check_port, default="5300",
                      help="port the b10-auth daemon will use (default 5300)")
539 540
    parser.add_option("-m", "--msgq-port", dest="msgq_port", type="string",
                      action="callback", callback=check_port, default="9912",
541
                      help="port the msgq daemon will use (default 9912)")
542
    (options, args) = parser.parse_args()
543 544

    # Announce startup.
545 546 547 548 549 550 551
    if options.verbose:
        sys.stdout.write("BIND 10 %s\n" % __version__)

    # TODO: set process name, perhaps by:
    #       http://code.google.com/p/procname/
    #       http://github.com/lericson/procname/

552 553 554 555
    # Create wakeup pipe for signal handlers
    wakeup_pipe = os.pipe()
    signal.set_wakeup_fd(wakeup_pipe[1])

556 557
    # Set signal handlers for catching child termination, as well
    # as our own demise.
558 559 560 561 562
    signal.signal(signal.SIGCHLD, reaper)
    signal.siginterrupt(signal.SIGCHLD, False)
    signal.signal(signal.SIGINT, fatal_signal)
    signal.signal(signal.SIGTERM, fatal_signal)

563
    # Go bob!
564 565
    boss_of_bind = BoB(int(options.msgq_port), int(options.auth_port), \
                       options.verbose)
566 567 568 569 570
    startup_result = boss_of_bind.startup()
    if startup_result:
        sys.stderr.write("Error on startup: %s\n" % startup_result)
        sys.exit(1)

571 572 573
    # In our main loop, we check for dead processes or messages 
    # on the c-channel.
    wakeup_fd = wakeup_pipe[0]
574
    ccs_fd = boss_of_bind.ccs.get_socket().fileno()
575
    while boss_of_bind.runnable:
Shane Kerr's avatar
Shane Kerr committed
576 577
        # clean up any processes that exited
        boss_of_bind.reap_children()
578 579 580 581 582
        next_restart = boss_of_bind.restart_processes()
        if next_restart is None:
            wait_time = None
        else:
            wait_time = max(next_restart - time.time(), 0)
583

Shane Kerr's avatar
Shane Kerr committed
584
        # select() can raise EINTR when a signal arrives, 
585 586 587
        # even if they are resumable, so we have to catch
        # the exception
        try:
588 589
            (rlist, wlist, xlist) = select.select([wakeup_fd, ccs_fd], [], [], 
                                                  wait_time)
590 591
        except select.error as err:
            if err.args[0] == errno.EINTR:
Shane Kerr's avatar
Shane Kerr committed
592
                (rlist, wlist, xlist) = ([], [], [])
593
            else:
Shane Kerr's avatar
Shane Kerr committed
594
                sys.stderr.write("Error with select(); %s\n" % err)
595 596
                break

Shane Kerr's avatar
Shane Kerr committed
597
        for fd in rlist + xlist:
598
            if fd == ccs_fd:
599
                boss_of_bind.ccs.check_command()
600 601 602 603 604 605
            elif fd == wakeup_fd:
                os.read(wakeup_fd, 32)

    # shutdown
    signal.signal(signal.SIGCHLD, signal.SIG_DFL)
    boss_of_bind.shutdown()
Shane Kerr's avatar
Shane Kerr committed
606 607 608

if __name__ == "__main__":
    main()