bind10.py 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""\
This file implements the Boss of Bind (BoB, or bob) program.

It's purpose is to start up the BIND 10 system, and then manage the
processes, by starting, stopping, and restarting processes that exit.

To start the system, it first runs the c-channel program (msgq), then
connects to that. It then runs the configuration manager, and reads
its own configuration. Then it proceeds to starting other modules.

The Python subprocess module is used for starting processes, but
because this is not efficient for managing groups of processes,
SIGCHLD signals are caught and processed using the signal module.

Most of the logic is contained in the BoB class. However, since Python
requires that signal processing happen in the main thread, we do
signal handling outside of that class, in the code running for
__main__.
"""

import subprocess
import signal
import os
import sys
import re
import errno
import time
from optparse import OptionParser, OptionValueError

30
import ISC.CC
31
32
33
34

# This is the version that gets displayed to the user.
__version__ = "v20091028 (Paving the DNS Parking Lot)"

35
36
37
# Nothing at all to do with the 1990-12-10 article here:
# http://www.subgenius.com/subg-digest/v2/0056.html

38
39
class BoB:
    """Boss of BIND class."""
40
    def __init__(self, c_channel_port=9912, verbose=False):
41
42
43
44
45
46
47
        """Initialize the Boss of BIND. This is a singleton (only one
        can run).
        
        The c_channel_port specifies the TCP/IP port that the msgq
        process listens on. If verbose is True, then the boss reports
        what it is doing.
        """
48
        self.verbose = True
49
        self.c_channel_port = c_channel_port
50
51
        self.cc_process = None
        self.cc_session = None
52
53
54
55
56
57
58
59
60
61
        self.processes = {}
        self.dead_processes = {}
        self.component_processes = {}

    def startup(self):
        """Start the BoB instance.
 
        Returns None if successful, otherwise an string describing the
        problem.
        """
62
63
        dev_null = open("/dev/null", "w")
        # start the c-channel daemon
64
        if self.verbose:
65
66
            sys.stdout.write("Starting msgq using port %d\n" % self.c_channel_port)
        c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
67
68
69
        try:
            c_channel = subprocess.Popen("msgq",
                                         stdin=subprocess.PIPE,
70
71
                                         stdout=dev_null,
                                         stderr=dev_null,
72
73
                                         close_fds=True,
                                         env=c_channel_env,)
74
        except:
75
76
77
78
            return "Unable to start msgq"
        self.processes[c_channel.pid] = c_channel
        if self.verbose:
            sys.stdout.write("Started msgq with PID %d\n" % c_channel.pid)
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

        # now connect to the c-channel
        cc_connect_start = time.time()
        while self.cc_session is None:
            # if we have been trying for "a while" give up
            if (time.time() - cc_connect_start) > 5:
                c_channel.kill()
                return "Unable to connect to c-channel after 5 seconds"
            # try to connect, and if we can't wait a short while
            try:
                self.cc_session = ISC.CC.Session(self.c_channel_port)
            except ISC.CC.session.SessionError:
                time.sleep(0.1)
        self.cc_session.group_subscribe("Boss")

        # start the configuration manager
        if self.verbose:
            sys.stdout.write("Starting bind-cfgd\n")
        try:
            bind_cfgd = subprocess.Popen("bind-cfgd",
                                         stdin=dev_null,
                                         stdout=dev_null,
                                         stderr=dev_null,
                                         close_fds=True,
                                         env={},)
        except:
            c_channel.kill()
            return "Unable to start bind-cfgd"
        self.processes[bind_cfgd.pid] = bind_cfgd
        if self.verbose:
            sys.stdout.write("Started bind-cfgd with PID %d\n" % bind_cfgd.pid)
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

        # start the parking lot
        # XXX: this must be read from the configuration manager in the future
        if self.verbose:
            sys.stdout.write("Starting parkinglot\n")
        try:
            parkinglot = subprocess.Popen("parkinglot",
                                          stdin=dev_null,
                                          stdout=dev_null,
                                          stderr=dev_null,
                                          close_fds=True,
                                          env={},)
        except:
            c_channel.kill()
            bind_cfgd.kill()
            return "Unable to start parkinglot"
        self.processes[parkinglot.pid] = parkinglot
        if self.verbose:
            sys.stdout.write("Started parkinglot with PID %d\n" % parkinglot.pid)

        # remember our super-important process
        self.cc_process = c_channel
132
        
133
134
        return None

135
136
137
138
    def stop_all_processes(self):
        """Stop all processes."""
        self.cc_session.group_sendmsg({ "shutdown": True }, "Boss")

139
140
141
142
143
144
145
146
147
148
    def stop_process(self, process):
        """Stop the given process, friendly-like."""
        # XXX nothing yet
        pass

    def shutdown(self):
        """Stop the BoB instance."""
        if self.verbose:
            sys.stdout.write("Stopping the server.\n")
        # first try using the BIND 10 request to stop
149
150
151
152
153
154
        if self.cc_session:
            try:
                self.stop_all_processes()
            except:
                pass
        time.sleep(0.1)  # XXX: some delay probably useful... how much is uncertain
155
        # next try sending a SIGTERM
156
        processes_to_stop = list(self.processes)
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        unstopped_processes = []
        for process in processes_to_stop:
            if self.verbose:
                sys.stdout.write("Sending SIGTERM to process %d.\n" % process.pid)
            try:
                process.terminate()
            except OSError as o:
                # ignore these (usually ESRCH because the child
                # finally exited)
                pass
        time.sleep(0.1)  # XXX: some delay probably useful... how much is uncertain
        for process in processes_to_stop:
            (pid, exit_status) = os.waitpid(process.pid, os.WNOHANG)
            if pid == 0:
                unstopped_processes.append(process)
        # finally, send a SIGKILL (unmaskable termination)
        processes_to_stop = unstopped_processes
        for process in processes_to_stop:
            if self.verbose:
                sys.stdout.write("Sending SIGKILL to process %d.\n" % process.pid)
            try:
                process.kill()
            except OSError as o:
                # ignore these (usually ESRCH because the child
                # finally exited)
                pass
        if self.verbose:
            sys.stdout.write("All processes ended, server done.\n")

    def reap(self, pid, exit_status):
        """The process specified by pid has exited with the value
        exit_status, so perform any action necessary (cleanup,
        restart, and so on).
        """
        process = self.processes.pop(pid)
        self.dead_processes[process.pid] = process
        if self.verbose:
            sys.stdout.write("Process %d died.\n" % pid)
195
        if self.cc_process and (pid == self.cc_process.pid):
196
197
            if self.verbose:
                sys.stdout.write("The msgq process died, shutting down.\n")
198
199
200
            return False
        else:
            return True
201
202
203
204
205
206
207
208
209
210
211
212

if __name__ == "__main__":
    def reaper(signal_number, stack_frame):
        """A child process has died (SIGCHLD received)."""
        global boss_of_bind
        while True:
            try:
                (pid, exit_status) = os.waitpid(-1, os.WNOHANG)
            except OSError as o:
                if o.errno == errno.ECHILD: break
                raise
            if pid == 0: break
213
214
215
216
217
218
            if not boss_of_bind.reap(pid, exit_status):
                signal.signal(signal.SIGCHLD, signal.SIG_DFL)
                boss_of_bind.shutdown()
                sys.exit(0)
                   
                
219
220
221
222
223
224
225
226
227

    def get_signame(signal_number):
        """Return the symbolic name for a signal."""
        for sig in dir(signal):
            if sig.startswith("SIG") and sig[3].isalnum():
                if getattr(signal, sig) == signal_number:
                    return sig
        return "Unknown signal %d" % signal_number

228
    # XXX: perhaps register atexit() function and invoke that instead
229
230
231
232
233
234
235
236
237
238
239
240
    def fatal_signal(signal_number, stack_frame):
        """We need to exit (SIGINT or SIGTERM received)."""
        global boss_of_bind
        global options
        if options.verbose:
            sys.stdout.write("Received %s.\n" % get_signame(signal_number))
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
        if boss_of_bind:
            boss_of_bind.shutdown()
        sys.exit(0)

    def check_port(option, opt_str, value, parser):
241
242
        """Function to insure that the port we are passed is actually 
        a valid port number. Used by OptionParser() on startup."""
243
244
245
246
        if not re.match('^(6553[0-5]|655[0-2]\d|65[0-4]\d\d|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{0,3}|0)$', value):
            raise OptionValueError("%s requires a port number (0-65535)" % opt_str)
        parser.values.msgq_port = value

247
    # Parse any command-line options.
248
249
250
251
252
253
254
    parser = OptionParser(version=__version__)
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
                      help="display more about what is going on")
    parser.add_option("-m", "--msgq-port", dest="msgq_port", type="string",
                      action="callback", callback=check_port, default="9912",
                      help="port the msgq daemon will use")
    (options, args) = parser.parse_args()
255
256

    # Announce startup.
257
258
259
260
261
262
263
    if options.verbose:
        sys.stdout.write("BIND 10 %s\n" % __version__)

    # TODO: set process name, perhaps by:
    #       http://code.google.com/p/procname/
    #       http://github.com/lericson/procname/

264
265
    # Set signal handlers for catching child termination, as well
    # as our own demise.
266
267
268
269
270
    signal.signal(signal.SIGCHLD, reaper)
    signal.siginterrupt(signal.SIGCHLD, False)
    signal.signal(signal.SIGINT, fatal_signal)
    signal.signal(signal.SIGTERM, fatal_signal)

271
    # Go bob!
272
    boss_of_bind = BoB(int(options.msgq_port), options.verbose)
273
274
275
276
277
278
279
280
    startup_result = boss_of_bind.startup()
    if startup_result:
        sys.stderr.write("Error on startup: %s\n" % startup_result)
        sys.exit(1)

    while True:
        time.sleep(1)