master.py 22.7 KB
Newer Older
Evan Hunt's avatar
Evan Hunt committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (C) 2010  Internet Systems Consortium.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
# DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
# INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
# FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

JINMEI Tatuya's avatar
propset  
JINMEI Tatuya committed
16 17
# $Id$

Evan Hunt's avatar
Evan Hunt committed
18
import sys, re, string
19 20
import time
import os
Evan Hunt's avatar
Evan Hunt committed
21 22 23 24 25 26 27
#########################################################################
# define exceptions
#########################################################################
class MasterFileError(Exception):
    pass

#########################################################################
28 29 30
# pop: remove the first word from a line
# input: a line
# returns: first word, rest of the line
Evan Hunt's avatar
Evan Hunt committed
31
#########################################################################
32 33 34 35 36 37 38 39
def pop(line):
    list = line.split()
    first, rest = '', ''
    if len(list) != 0:
        first = list[0]
    if len(list) > 1:
        rest = ' '.join(list[1:])
    return first, rest
Evan Hunt's avatar
Evan Hunt committed
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96

#########################################################################
# cleanup: removes excess content from zone file data, including comments
# and extra whitespace
# input:
#   line of text
# returns:
#   the same line, with comments removed, leading and trailing
#   whitespace removed, and all other whitespace compressed to
#   single spaces
#########################################################################
decomment = re.compile('\s*(?:;.*)+')
def cleanup(s):
    global decomment
    s = s.strip().expandtabs()
    s = decomment.sub('', s)
    return ' '.join(s.split())

#########################################################################
# istype: check whether a string is a known RR type.
# returns: boolean
#########################################################################
rrtypes = set(['a', 'aaaa', 'afsdb', 'apl', 'cert', 'cname', 'dhcid',
               'dlv', 'dname', 'dnskey', 'ds', 'gpos', 'hinfo', 'hip',
               'ipseckey', 'isdn', 'key', 'kx', 'loc', 'mb', 'md',
               'mf', 'mg', 'minfo', 'mr', 'mx', 'naptr', 'ns', 'nsap',
               'nsap-ptr', 'nsec', 'nsec3', 'nsec3param', 'null',
               'nxt', 'opt', 'ptr', 'px', 'rp', 'rrsig', 'rt', 'sig',
               'soa', 'spf', 'srv', 'sshfp', 'tkey', 'tsig', 'txt',
               'x25', 'wks'])
def istype(s):
    global rrtypes
    if s.lower() in rrtypes:
        return True
    else:
        return False

#########################################################################
# isclass: check whether a string is a known RR class.  (only 'IN' is
# supported, but the others must still be recognizable.)
# returns: boolean
#########################################################################
rrclasses = set(['in', 'ch', 'chaos', 'hs', 'hesiod'])
def isclass(s):
    global rrclasses
    if s.lower() in rrclasses:
        return True
    else:
        return False

#########################################################################
# isname: check whether a string is a valid DNS name.
# returns: boolean
#########################################################################
name_regex = re.compile('[-\w\$\d\/*]+(?:\.[-\w\$\d\/]+)*\.?')
def isname(s):
    global name_regex
97
    if s == '.' or name_regex.match(s):
Evan Hunt's avatar
Evan Hunt committed
98 99 100 101 102 103 104 105
        return True
    else:
        return False

#########################################################################
# isttl: check whether a string is a valid TTL specifier.
# returns: boolean
#########################################################################
106
ttl_regex = re.compile('([0-9]+[wdhms]?)+$', re.I)
Evan Hunt's avatar
Evan Hunt committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
def isttl(s):
    global ttl_regex
    if ttl_regex.match(s):
        return True
    else:
        return False

#########################################################################
# parse_ttl: convert a TTL field into an integer TTL value
# (multiplying as needed for minutes, hours, etc.)
# input:
#   string
# returns:
#   int
# throws:
#   MasterFileError
#########################################################################
def parse_ttl(s):
125 126
    sum = 0
    if not isttl(s):
Evan Hunt's avatar
Evan Hunt committed
127
        raise MasterFileError('Invalid TTL: ' + s)
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
    for ttl_expr in re.findall('\d+[wdhms]?', s, re.I):
        if ttl_expr.isdigit():
            ttl = int(ttl_expr)
            sum += ttl
            continue
        ttl = int(ttl_expr[:-1])
        suffix = ttl_expr[-1].lower()
        if suffix == 'w':
            ttl *= 604800
        elif suffix == 'd':
            ttl *= 86400
        elif suffix == 'h':
            ttl *= 3600
        elif suffix == 'm':
            ttl *= 60
        sum += ttl
    return str(sum)
Evan Hunt's avatar
Evan Hunt committed
145 146

#########################################################################
147 148
# records: generator function to return complete RRs from the zone file,
# combining lines when necessary because of parentheses
Evan Hunt's avatar
Evan Hunt committed
149
# input:
150 151 152
#   descriptor for a zone master file (returned from openzone)
# yields:
#   complete RR
Evan Hunt's avatar
Evan Hunt committed
153
#########################################################################
154 155 156 157
def records(input):
    record = []
    complete = True
    paren = 0
158
    size = 0
159
    for line in input:
160
        size += len(line)
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
        list = cleanup(line).split()
        for word in list:
            if paren == 0:
                left, p, right = word.partition('(')
                if p == '(':
                    if left: record.append(left)
                    if right: record.append(right)
                    paren += 1
                else:
                    record.append(word)
            else:
                left, p, right = word.partition(')')
                if p == ')':
                    if left: record.append(left)
                    if right: record.append(right)
                    paren -= 1
                else:
                    record.append(word)
Evan Hunt's avatar
Evan Hunt committed
179

180 181
        if paren == 1 or not record:
            continue
182
        
183 184
        ret = ' '.join(record)
        record = []
185 186 187
        oldsize = size
        size = 0
        yield ret, oldsize
Evan Hunt's avatar
Evan Hunt committed
188 189

#########################################################################
190 191 192
# define the MasterFile class for reading zone master files
#########################################################################
class MasterFile:
Evan Hunt's avatar
Evan Hunt committed
193
    __rrclass = 'IN'
194
    __maxttl = 0x7fffffff
Evan Hunt's avatar
Evan Hunt committed
195
    __ttl = ''
196
    __lastttl = ''
197 198
    __zonefile = ''
    __name = ''
199 200 201 202
    __file_level = 0
    __file_type = ""
    __init_time = time.time()
    __records_num = 0
203

204
    def __init__(self, filename, initial_origin = '', verbose = False):
205 206
        self.__initial_origin = initial_origin
        self.__origin = initial_origin
207 208 209 210 211 212 213 214 215 216 217
        self.__datafile = filename

        try:
            self.__zonefile = open(filename, 'r')
        except:
            raise MasterFileError("Could not open " + filename)
        self.__filesize = os.fstat(self.__zonefile.fileno()).st_size

        self.__cur = 0
        self.__numback = 0
        self.__verbose = verbose
218 219 220 221 222
        try:
            self.__zonefile = open(filename, 'r')
        except:
            raise MasterFileError("Could not open " + filename)

223 224 225 226 227 228 229 230
    def __status(self):
        interval = time.time() - MasterFile.__init_time
        if self.__filesize == 0:
            percent = 100
        else:
            percent = (self.__cur * 100)/self.__filesize

        sys.stdout.write("\r" + (80 * " "))
231
        sys.stdout.write("\r%d RR(s) loaded in %.2f second(s) (%.2f%% of %s%s)"\
232 233
                % (MasterFile.__records_num, interval, percent, MasterFile.__file_type, self.__datafile))

234 235 236
    def __del__(self):
        if self.__zonefile:
            self.__zonefile.close()
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
    ########################################################################
    # check if the zonename is relative
    # no then return
    # yes , sets the relative domain name to the stated name
    #######################################################################
    def __statedname(self, name, record):
        if name[-1] != '.':
            if not self.__origin:
                raise MasterFileError("Cannot parse RR, No $ORIGIN: " + record)
            elif self.__origin == '.':
                name += '.'
            else:
                name += '.' + self.__origin
        return name
    #####################################################################
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
    # handle $ORIGIN, $TTL and $GENERATE directives
    # (currently only $ORIGIN and $TTL are implemented)
    # input:
    #   a line from a zone file
    # returns:
    #   a boolean indicating whether a directive was found
    # throws:
    #   MasterFileError
    #########################################################################
    def __directive(self, s):
        first, more = pop(s)
        second, more = pop(more)
        if re.match('\$origin', first, re.I):
            if not second or not isname(second):
                raise MasterFileError('Invalid $ORIGIN')
            if more:
                raise MasterFileError('Invalid $ORIGIN')
269
            if second[-1] == '.':
270
                self.__origin = second
271
            elif not self.__origin:
Jeremy C. Reed's avatar
Jeremy C. Reed committed
272
                raise MasterFileError("$ORIGIN is not absolute in record: %s" % s)
273
            elif self.__origin != '.':
274
                self.__origin = second + '.' + self.__origin
275 276
            else:
                self.__origin = second + '.'
277 278 279 280 281 282
            return True
        elif re.match('\$ttl', first, re.I):
            if not second or not isttl(second):
                raise MasterFileError('Invalid TTL: "' + second + '"')
            if more:
                raise MasterFileError('Invalid $TTL statement')
283 284
            MasterFile.__ttl = parse_ttl(second)
            if int(MasterFile.__ttl) > self.__maxttl:
285 286 287 288
                raise MasterFileError('TTL too high: ' + second)
            return True
        elif re.match('\$generate', first, re.I):
            raise MasterFileError('$GENERATE not yet implemented')
Evan Hunt's avatar
Evan Hunt committed
289
        else:
290 291 292 293 294 295 296 297 298 299 300
            return False

    #########################################################################
    # handle $INCLUDE directives
    # input:
    #   a line from a zone file
    # returns:
    #   the parsed output of the included file, if any, or an empty array
    # throws:
    #   MasterFileError
    #########################################################################
301 302 303
    __include_syntax1 = re.compile('\s+(\S+)(?:\s+(\S+))?$', re.I)
    __include_syntax2 = re.compile('\s+"([^"]+)"(?:\s+(\S+))?$', re.I)
    __include_syntax3 = re.compile("\s+'([^']+)'(?:\s+(\S+))?$", re.I)
304
    def __include(self, s):
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
        if not s.lower().startswith('$include'):
            return "", ""
        s = s[len('$include'):]
        m = self.__include_syntax1.match(s)
        if not m:
            m = self.__include_syntax2.match(s)
        if not m:
            m = self.__include_syntax3.match(s)
        if not m:
            raise MasterFileError('Invalid $include format')
        file = m.group(1)
        if m.group(2):
            if not isname(m.group(2)):
                raise MasterFileError('Invalid $include format (invalid origin)')
            origin = self.__statedname(m.group(2), s)
        else:
            origin = self.__origin
        return file, origin
323 324 325 326 327 328 329 330 331 332 333 334 335 336

    #########################################################################
    # try parsing an RR on the assumption that the type is specified in
    # field 4, and name, ttl and class are in fields 1-3
    # are all specified, with type in field 4
    # input:
    #   a record to parse, and the most recent name found in prior records
    # returns:
    #   empty list if parse failed, else name, ttl, class, type, rdata
    #########################################################################
    def __four(self, record, curname):
        ret = ''
        list = record.split()
        if len(list) <= 4:
Evan Hunt's avatar
Evan Hunt committed
337
            return ret
338 339 340
        if istype(list[3]):
            if isclass(list[2]) and isttl(list[1]) and isname(list[0]):
                name, ttl, rrclass, rrtype = list[0:4]
341 342 343 344 345 346 347 348
                ttl = parse_ttl(ttl)
                MasterFile.__lastttl = ttl or MasterFile.__lastttl
                rdata = ' '.join(list[4:])
                ret = name, ttl, rrclass, rrtype, rdata
            elif isclass(list[1]) and isttl(list[2]) and isname(list[0]):
                name, rrclass, ttl, rrtype = list[0:4]
                ttl = parse_ttl(ttl)
                MasterFile.__lastttl = ttl or MasterFile.__lastttl
349 350
                rdata = ' '.join(list[4:])
                ret = name, ttl, rrclass, rrtype, rdata
Evan Hunt's avatar
Evan Hunt committed
351 352
        return ret

353 354 355 356 357 358 359 360
    #########################################################################
    # try parsing an RR on the assumption that the type is specified
    # in field 3, and one of name, ttl, or class has been omitted
    # input:
    #   a record to parse, and the most recent name found in prior records
    # returns:
    #   empty list if parse failed, else name, ttl, class, type, rdata
    #########################################################################
361 362 363
    def __getttl(self):
        return MasterFile.__ttl or MasterFile.__lastttl

364 365 366 367 368 369 370 371
    def __three(self, record, curname):
        ret = ''
        list = record.split()
        if len(list) <= 3:
            return ret
        if istype(list[2]) and not istype(list[1]):
            if isclass(list[1]) and not isttl(list[0]) and isname(list[0]):
                rrclass = list[1]
372
                ttl = self.__getttl()
373
                name = list[0]
374
            elif not isclass(list[1]) and isttl(list[1]) and not isclass(list[0]) and isname(list[0]):
Evan Hunt's avatar
Evan Hunt committed
375
                rrclass = self.__rrclass
376
                ttl = parse_ttl(list[1])
377
                MasterFile.__lastttl = ttl or MasterFile.__lastttl
378 379
                name = list[0]
            elif curname and isclass(list[1]) and isttl(list[0]):
380
                rrclass = list[1]
381
                ttl = parse_ttl(list[0])
382 383 384 385 386 387
                MasterFile.__lastttl = ttl or MasterFile.__lastttl
                name = curname
            elif curname and isttl(list[1]) and isclass(list[0]):
                rrclass = list[0]
                ttl = parse_ttl(list[1])
                MasterFile.__lastttl = ttl or MasterFile.__lastttl
388 389 390 391 392 393 394
                name = curname
            else:
                return ret
            rrtype = list[2]
            rdata = ' '.join(list[3:])
            ret = name, ttl, rrclass, rrtype, rdata
        return ret
Evan Hunt's avatar
Evan Hunt committed
395

396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
    #########################################################################
    # try parsing an RR on the assumption that the type is specified in
    # field 2, and field 1 is either name or ttl
    # input:
    #   a record to parse, and the most recent name found in prior records
    # returns:
    #   empty list if parse failed, else name, ttl, class, type, rdata
    # throws:
    #   MasterFileError
    #########################################################################
    def __two(self, record, curname):
        ret = ''
        list = record.split()
        if len(list) <= 2:
            return ret
        if istype(list[1]):
Evan Hunt's avatar
Evan Hunt committed
412
            rrclass = self.__rrclass
413 414 415
            rrtype = list[1]
            if list[0].lower() == 'rrsig':
                name = curname
416
                ttl = self.__getttl()
417 418 419 420 421 422
                rrtype = list[0]
                rdata = ' '.join(list[1:])
            elif isttl(list[0]):
                ttl = parse_ttl(list[0])
                name = curname
                rdata = ' '.join(list[2:])
423 424 425 426
            elif isclass(list[0]):
                ttl = self.__getttl()
                name = curname
                rdata = ' '.join(list[2:])
427 428
            elif isname(list[0]):
                name = list[0]
429
                ttl = self.__getttl()
430 431 432
                rdata = ' '.join(list[2:])
            else:
                raise MasterFileError("Cannot parse RR: " + record)
Evan Hunt's avatar
Evan Hunt committed
433

434 435
            ret = name, ttl, rrclass, rrtype, rdata
        return ret
Evan Hunt's avatar
Evan Hunt committed
436

437 438 439 440 441 442
    ########################################################################
    #close verbose
    ######################################################################
    def closeverbose(self):
        self.__status()

443 444 445 446 447 448
    #########################################################################
    # zonedata: generator function to parse a zone master file and return
    # each RR as a (name, ttl, type, class, rdata) tuple
    #########################################################################
    def zonedata(self):
        name = ''
449 450 451 452 453 454 455 456 457 458 459 460 461 462
        last_status = 0.0
        flag = 1

        for record, size in records(self.__zonefile):
            if self.__verbose:
                now = time.time()
                if flag == 1:
                    self.__status()
                    flag = 0
                if now - last_status >= 1.0:
                    self.__status()
                    last_status = now

            self.__cur += size
463 464 465
            if self.__directive(record):
                continue

466
            incl, suborigin = self.__include(record)
467
            if incl:
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
                if self.__filesize == 0:
                    percent = 100
                else:
                    percent = (self.__cur * 100)/self.__filesize
                if self.__verbose:
                    sys.stdout.write("\r" + (80 * " "))
                    sys.stdout.write("\rIncluding \"%s\" from \"%s\"\n" % (incl, self.__datafile))
                MasterFile.__file_level += 1
                MasterFile.__file_type = "included "
                sub = MasterFile(incl, suborigin, self.__verbose)

                for rrname, ttl, rrclass, rrtype, rdata in sub.zonedata():
                    yield (rrname, ttl, rrclass, rrtype, rdata)
                if self.__verbose:
                    sub.closeverbose()
                MasterFile.__file_level -= 1
                if MasterFile.__file_level == 0:
                    MasterFile.__file_type = ""
486 487 488 489 490 491 492 493
                del sub
                continue

            # replace @ with origin
            rl = record.split()
            if rl[0] == '@':
                rl[0] = self.__origin
                if not self.__origin:
494
                    raise MasterFileError("Cannot parse RR, No $ORIGIN: " + record)
495 496 497 498 499 500 501 502 503 504 505 506 507
                record = ' '.join(rl)

            result = self.__four(record, name)

            if not result:
                result = self.__three(record, name)

            if not result:
                result = self.__two(record, name)

            if not result:
                first, rdata = pop(record)
                if istype(first):
508
                    result = name, self.__getttl(), self.__rrclass, first, rdata
509 510 511 512 513

            if not result:
                raise MasterFileError("Cannot parse RR: " + record)

            name, ttl, rrclass, rrtype, rdata = result
514
            name = self.__statedname(name, record)
515 516 517 518 519 520 521 522

            if rrclass.lower() != 'in':
                raise MasterFileError("CH and HS zones not supported")

            # add origin to rdata containing names, if necessary
            if rrtype.lower() in ('cname', 'dname', 'ns', 'ptr'):
                if not isname(rdata):
                    raise MasterFileError("Invalid " + rrtype + ": " + rdata)
523 524
                rdata = self.__statedname(rdata, record)

525 526 527 528
            if rrtype.lower() == 'soa':
                soa = rdata.split()
                if len(soa) < 2 or not isname(soa[0]) or not isname(soa[1]):
                    raise MasterFileError("Invalid " + rrtype + ": " + rdata)
529 530 531 532 533 534 535 536 537 538 539
                soa[0] = self.__statedname(soa[0], record)
                soa[1] = self.__statedname(soa[1], record)
                if not MasterFile.__ttl and not ttl:
                    MasterFile.__ttl = MasterFile.__ttl or parse_ttl(soa[-1])
                    ttl = MasterFile.__ttl

                for index in range(3, len(soa)):
                    if isttl(soa[index]):
                        soa[index] = parse_ttl(soa[index])
                    else :
                        raise MasterFileError("No TTL specified; in soa record!")
540
                rdata = ' '.join(soa)
541 542 543 544

            if not ttl:
                raise MasterFileError("No TTL specified; zone rejected")

545 546 547 548 549 550 551
            if rrtype.lower() == 'mx':
                mx = rdata.split()
                if len(mx) != 2 or not isname(mx[1]):
                    raise MasterFileError("Invalid " + rrtype + ": " + rdata)
                if mx[1][-1] != '.':
                    mx[1] += '.' + self.__origin
                    rdata = ' '.join(mx)
552
            MasterFile.__records_num += 1
553 554 555 556 557 558 559 560 561 562 563
            yield (name, ttl, rrclass, rrtype, rdata)

    #########################################################################
    # zonename: scans zone data for an SOA record, returns its name, restores
    # the zone file to its prior state
    #########################################################################
    def zonename(self):
        if self.__name:
            return self.__name
        old_origin = self.__origin
        self.__origin = self.__initial_origin
564
        cur_value = self.__cur
565
        old_location = self.__zonefile.tell()
566 567
        old_verbose = self.__verbose
        self.__verbose = False
568
        self.__zonefile.seek(0)
569

570 571 572 573 574
        for name, ttl, rrclass, rrtype, rdata in self.zonedata():
            if rrtype.lower() == 'soa':
                break
        self.__zonefile.seek(old_location)
        self.__origin = old_origin
575
        self.__cur = cur_value
576 577 578
        if rrtype.lower() != 'soa':
            raise MasterFileError("No SOA found")
        self.__name = name
579
        self.__verbose = old_verbose
580 581 582 583 584 585 586 587
        return name

    #########################################################################
    # reset: reset the state of the master file
    #########################################################################
    def reset(self):
        self.__zonefile.seek(0)
        self.__origin = self.__initial_origin
588 589
        MasterFile.__ttl = ''
        MasterFile.__lastttl = ''
Evan Hunt's avatar
Evan Hunt committed
590 591 592 593 594 595 596 597 598 599

#########################################################################
# main: used for testing; parse a zone file and print out each record
# broken up into separate name, ttl, class, type, and rdata files
#########################################################################
def main():
    try:
        file = sys.argv[1]
    except:
        file = 'testfile'
600 601
    master = MasterFile(file, '.')
    print ('zone name: ' + master.zonename())
602
    print ('---------------------')
603
    for name, ttl, rrclass, rrtype, rdata in master.zonedata():
Evan Hunt's avatar
Evan Hunt committed
604
        print ('name: ' + name)
Evan Hunt's avatar
Evan Hunt committed
605
        print ('ttl: ' + ttl)
Evan Hunt's avatar
Evan Hunt committed
606 607 608 609
        print ('rrclass: ' + rrclass)
        print ('rrtype: ' + rrtype)
        print ('rdata: ' + rdata)
        print ('---------------------')
610
    del master
Evan Hunt's avatar
Evan Hunt committed
611 612 613

if __name__ == "__main__":
    main()