bind10.py 10.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""\
This file implements the Boss of Bind (BoB, or bob) program.

It's purpose is to start up the BIND 10 system, and then manage the
processes, by starting, stopping, and restarting processes that exit.

To start the system, it first runs the c-channel program (msgq), then
connects to that. It then runs the configuration manager, and reads
its own configuration. Then it proceeds to starting other modules.

The Python subprocess module is used for starting processes, but
because this is not efficient for managing groups of processes,
SIGCHLD signals are caught and processed using the signal module.

Most of the logic is contained in the BoB class. However, since Python
requires that signal processing happen in the main thread, we do
signal handling outside of that class, in the code running for
__main__.
"""

import subprocess
import signal
import os
import sys
import re
import errno
import time
from optparse import OptionParser, OptionValueError

30
import ISC.CC
31
32
33
34

# This is the version that gets displayed to the user.
__version__ = "v20091028 (Paving the DNS Parking Lot)"

35
36
37
# Nothing at all to do with the 1990-12-10 article here:
# http://www.subgenius.com/subg-digest/v2/0056.html

38
39
class BoB:
    """Boss of BIND class."""
40
    def __init__(self, c_channel_port=9912, verbose=False):
41
42
43
44
45
46
47
        """Initialize the Boss of BIND. This is a singleton (only one
        can run).
        
        The c_channel_port specifies the TCP/IP port that the msgq
        process listens on. If verbose is True, then the boss reports
        what it is doing.
        """
48
        self.verbose = True
49
        self.c_channel_port = c_channel_port
50
51
        self.cc_process = None
        self.cc_session = None
52
53
54
55
56
57
58
59
60
61
        self.processes = {}
        self.dead_processes = {}
        self.component_processes = {}

    def startup(self):
        """Start the BoB instance.
 
        Returns None if successful, otherwise an string describing the
        problem.
        """
62
63
        dev_null = open("/dev/null", "w")
        # start the c-channel daemon
64
        if self.verbose:
65
66
            sys.stdout.write("Starting msgq using port %d\n" % self.c_channel_port)
        c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
67
68
69
        try:
            c_channel = subprocess.Popen("msgq",
                                         stdin=subprocess.PIPE,
70
71
                                         stdout=dev_null,
                                         stderr=dev_null,
72
73
                                         close_fds=True,
                                         env=c_channel_env,)
74
        except:
75
76
77
78
            return "Unable to start msgq"
        self.processes[c_channel.pid] = c_channel
        if self.verbose:
            sys.stdout.write("Started msgq with PID %d\n" % c_channel.pid)
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

        # now connect to the c-channel
        cc_connect_start = time.time()
        while self.cc_session is None:
            # if we have been trying for "a while" give up
            if (time.time() - cc_connect_start) > 5:
                c_channel.kill()
                return "Unable to connect to c-channel after 5 seconds"
            # try to connect, and if we can't wait a short while
            try:
                self.cc_session = ISC.CC.Session(self.c_channel_port)
            except ISC.CC.session.SessionError:
                time.sleep(0.1)
        self.cc_session.group_subscribe("Boss")

        # start the configuration manager
        if self.verbose:
            sys.stdout.write("Starting bind-cfgd\n")
        try:
            bind_cfgd = subprocess.Popen("bind-cfgd",
                                         stdin=dev_null,
                                         stdout=dev_null,
                                         stderr=dev_null,
                                         close_fds=True,
                                         env={},)
        except:
            c_channel.kill()
            return "Unable to start bind-cfgd"
        self.processes[bind_cfgd.pid] = bind_cfgd
        if self.verbose:
            sys.stdout.write("Started bind-cfgd with PID %d\n" % bind_cfgd.pid)
110
111
112

        # start the parking lot
        # XXX: this must be read from the configuration manager in the future
113
        # XXX: we hardcode port 5300
114
115
116
        if self.verbose:
            sys.stdout.write("Starting parkinglot\n")
        try:
117
            parkinglot = subprocess.Popen(["parkinglot", "-p", "5300",],
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
                                          stdin=dev_null,
                                          stdout=dev_null,
                                          stderr=dev_null,
                                          close_fds=True,
                                          env={},)
        except:
            c_channel.kill()
            bind_cfgd.kill()
            return "Unable to start parkinglot"
        self.processes[parkinglot.pid] = parkinglot
        if self.verbose:
            sys.stdout.write("Started parkinglot with PID %d\n" % parkinglot.pid)

        # remember our super-important process
        self.cc_process = c_channel
133
        
134
135
        return None

136
137
138
139
    def stop_all_processes(self):
        """Stop all processes."""
        self.cc_session.group_sendmsg({ "shutdown": True }, "Boss")

140
141
142
143
144
145
146
147
148
149
    def stop_process(self, process):
        """Stop the given process, friendly-like."""
        # XXX nothing yet
        pass

    def shutdown(self):
        """Stop the BoB instance."""
        if self.verbose:
            sys.stdout.write("Stopping the server.\n")
        # first try using the BIND 10 request to stop
150
151
152
153
154
155
        if self.cc_session:
            try:
                self.stop_all_processes()
            except:
                pass
        time.sleep(0.1)  # XXX: some delay probably useful... how much is uncertain
156
        # next try sending a SIGTERM
157
        processes_to_stop = list(self.processes.values())
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
        unstopped_processes = []
        for process in processes_to_stop:
            if self.verbose:
                sys.stdout.write("Sending SIGTERM to process %d.\n" % process.pid)
            try:
                process.terminate()
            except OSError as o:
                # ignore these (usually ESRCH because the child
                # finally exited)
                pass
        time.sleep(0.1)  # XXX: some delay probably useful... how much is uncertain
        for process in processes_to_stop:
            (pid, exit_status) = os.waitpid(process.pid, os.WNOHANG)
            if pid == 0:
                unstopped_processes.append(process)
        # finally, send a SIGKILL (unmaskable termination)
        processes_to_stop = unstopped_processes
        for process in processes_to_stop:
            if self.verbose:
                sys.stdout.write("Sending SIGKILL to process %d.\n" % process.pid)
            try:
                process.kill()
            except OSError as o:
                # ignore these (usually ESRCH because the child
                # finally exited)
                pass
        if self.verbose:
            sys.stdout.write("All processes ended, server done.\n")

    def reap(self, pid, exit_status):
        """The process specified by pid has exited with the value
        exit_status, so perform any action necessary (cleanup,
        restart, and so on).
191
192
193
  
        Returns True if everything is okay, or False if a fatal error
        has been detected and the program should exit.
194
195
196
197
198
        """
        process = self.processes.pop(pid)
        self.dead_processes[process.pid] = process
        if self.verbose:
            sys.stdout.write("Process %d died.\n" % pid)
199
        if self.cc_process and (pid == self.cc_process.pid):
200
201
            if self.verbose:
                sys.stdout.write("The msgq process died, shutting down.\n")
202
203
204
            return False
        else:
            return True
205
206
207
208
209
210
211
212
213
214
215
216

if __name__ == "__main__":
    def reaper(signal_number, stack_frame):
        """A child process has died (SIGCHLD received)."""
        global boss_of_bind
        while True:
            try:
                (pid, exit_status) = os.waitpid(-1, os.WNOHANG)
            except OSError as o:
                if o.errno == errno.ECHILD: break
                raise
            if pid == 0: break
217
218
219
220
221
222
            if not boss_of_bind.reap(pid, exit_status):
                signal.signal(signal.SIGCHLD, signal.SIG_DFL)
                boss_of_bind.shutdown()
                sys.exit(0)
                   
                
223
224
225
226
227
228
229
230
231

    def get_signame(signal_number):
        """Return the symbolic name for a signal."""
        for sig in dir(signal):
            if sig.startswith("SIG") and sig[3].isalnum():
                if getattr(signal, sig) == signal_number:
                    return sig
        return "Unknown signal %d" % signal_number

232
    # XXX: perhaps register atexit() function and invoke that instead
233
234
235
236
237
238
239
240
241
242
243
244
    def fatal_signal(signal_number, stack_frame):
        """We need to exit (SIGINT or SIGTERM received)."""
        global boss_of_bind
        global options
        if options.verbose:
            sys.stdout.write("Received %s.\n" % get_signame(signal_number))
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
        if boss_of_bind:
            boss_of_bind.shutdown()
        sys.exit(0)

    def check_port(option, opt_str, value, parser):
245
246
        """Function to insure that the port we are passed is actually 
        a valid port number. Used by OptionParser() on startup."""
247
248
249
250
        if not re.match('^(6553[0-5]|655[0-2]\d|65[0-4]\d\d|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{0,3}|0)$', value):
            raise OptionValueError("%s requires a port number (0-65535)" % opt_str)
        parser.values.msgq_port = value

251
    # Parse any command-line options.
252
253
254
255
256
257
258
    parser = OptionParser(version=__version__)
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
                      help="display more about what is going on")
    parser.add_option("-m", "--msgq-port", dest="msgq_port", type="string",
                      action="callback", callback=check_port, default="9912",
                      help="port the msgq daemon will use")
    (options, args) = parser.parse_args()
259
260

    # Announce startup.
261
262
263
264
265
266
267
    if options.verbose:
        sys.stdout.write("BIND 10 %s\n" % __version__)

    # TODO: set process name, perhaps by:
    #       http://code.google.com/p/procname/
    #       http://github.com/lericson/procname/

268
269
    # Set signal handlers for catching child termination, as well
    # as our own demise.
270
271
272
273
274
    signal.signal(signal.SIGCHLD, reaper)
    signal.siginterrupt(signal.SIGCHLD, False)
    signal.signal(signal.SIGINT, fatal_signal)
    signal.signal(signal.SIGTERM, fatal_signal)

275
    # Go bob!
276
    boss_of_bind = BoB(int(options.msgq_port), options.verbose)
277
278
279
280
281
282
283
284
    startup_result = boss_of_bind.startup()
    if startup_result:
        sys.stderr.write("Error on startup: %s\n" % startup_result)
        sys.exit(1)

    while True:
        time.sleep(1)