master_lexer.h 29.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.

#ifndef MASTER_LEXER_H
#define MASTER_LEXER_H 1

#include <exceptions/exceptions.h>

20
#include <istream>
21
22
23
24
25
26
#include <string>

#include <stdint.h>

namespace isc {
namespace dns {
27
28
29
namespace master_lexer_internal {
class State;
}
30

31
32
33
34
35
36
37
38
39
40
41
42
43
44
/// \brief Tokens for \c MasterLexer
///
/// This is a simple value-class encapsulating a type of a lexer token and
/// (if it has a value) its value.  Essentially, the class provides
/// constructors corresponding to different types of tokens, and corresponding
/// getter methods.  The type and value are fixed at the time of construction
/// and will never be modified throughout the lifetime of the object.
/// The getter methods are still provided to maximize the safety; an
/// application cannot refer to a value that is invalid for the type of token.
///
/// This class is intentionally implemented as copyable and assignable
/// (using the default version of copy constructor and assignment operator),
/// but it's mainly for internal implementation convenience.  Applications will
/// simply refer to Token object as a reference via the \c MasterLexer class.
45
class MasterToken {
46
public:
47
    /// \brief Enumeration for token types
48
49
50
51
52
    ///
    /// \note At the time of initial implementation, all numeric tokens
    /// that would be extracted from \c MasterLexer should be represented
    /// as an unsigned 32-bit integer.  If we see the need for larger integers
    /// or negative numbers, we can then extend the token types.
53
    enum Type {
54
55
        END_OF_LINE, ///< End of line detected
        END_OF_FILE, ///< End of file detected
56
        INITIAL_WS,  ///< White spaces at the beginning of a line after an
57
58
                     ///< end of line or at the beginning of file (if asked
                     //   for detecting it)
59
60
61
62
63
64
65
        NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
                                       /// no-value (type only) types.
                                       /// Mainly for internal use.
        STRING, ///< A single string
        QSTRING, ///< A single string quoted by double-quotes (").
        NUMBER,  ///< A decimal number (unsigned 32-bit)
        ERROR    ///< Error detected in getting a token
66
    };
67

68
    /// \brief Enumeration for lexer error codes
69
    enum ErrorCode {
70
71
72
        NOT_STARTED, ///< The lexer is just initialized and has no token
        UNBALANCED_PAREN,       ///< Unbalanced parentheses detected
        UNEXPECTED_END, ///< The lexer reaches the end of line or file
73
                        /// unexpectedly
74
        UNBALANCED_QUOTES,      ///< Unbalanced quotations detected
75
76
        NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
                           /// error and should never get out of the lexer.
Jelte Jansen's avatar
Jelte Jansen committed
77
        NUMBER_OUT_OF_RANGE, ///< Number was out of range
78
        BAD_NUMBER,    ///< Number is expected but not recognized
79
80
        MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
                       /// (excluding this one). Mainly for internal use.
81
82
    };

83
84
85
86
87
88
89
90
    /// \brief A simple representation of a range of a string.
    ///
    /// This is a straightforward pair of the start pointer of a string
    /// and its length.  The \c STRING and \c QSTRING types of tokens
    /// will be primarily represented in this form.
    ///
    /// Any character can be stored in the valid range of the region.
    /// In particular, there can be a nul character (\0) in the middle of
91
    /// the region.  So the usual string manipulation API may not work
92
    /// as expected.
93
94
95
96
97
98
99
    ///
    /// The `MasterLexer` implementation ensures that there are at least
    /// len + 1 bytes of valid memory region starting from beg, and that
    /// beg[len] is \0.  This means the application can use the bytes as a
    /// validly nul-terminated C string if there is no intermediate nul
    /// character.  Note also that due to this property beg is always non
    /// NULL; for an empty string len will be set to 0 and beg[0] is \0.
100
    struct StringRegion {
101
102
        const char* beg;        ///< The start address of the string
        size_t len;             ///< The length of the string in bytes
103
104
    };

105
106
107
108
109
    /// \brief Constructor for non-value type of token.
    ///
    /// \throw InvalidParameter A value type token is specified.
    /// \param type The type of the token.  It must indicate a non-value
    /// type (not larger than \c NOVALUE_TYPE_MAX).
110
    explicit MasterToken(Type type) : type_(type) {
111
        if (type > NOVALUE_TYPE_MAX) {
112
113
            isc_throw(InvalidParameter, "Token per-type constructor "
                      "called with invalid type: " << type);
114
        }
115
    }
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

    /// \brief Constructor for string and quoted-string types of token.
    ///
    /// The optional \c quoted parameter specifies whether it's a quoted or
    /// non quoted string.
    ///
    /// The string is specified as a pair of a pointer to the start address
    /// and its length.  Any character can be contained in any position of
    /// the valid range (see \c StringRegion).
    ///
    /// When it's a quoted string, the quotation marks must be excluded
    /// from the specified range.
    ///
    /// \param str_beg The start address of the string
    /// \param str_len The size of the string in bytes
    /// \param quoted true if it's a quoted string; false otherwise.
132
    MasterToken(const char* str_beg, size_t str_len, bool quoted = false) :
133
134
135
136
137
        type_(quoted ? QSTRING : STRING)
    {
        val_.str_region_.beg = str_beg;
        val_.str_region_.len = str_len;
    }
138
139
140
141
142

    /// \brief Constructor for number type of token.
    ///
    /// \brief number An unsigned 32-bit integer corresponding to the token
    /// value.
143
    explicit MasterToken(uint32_t number) : type_(NUMBER) {
144
145
        val_.number_ = number;
    }
146
147
148
149
150

    /// \brief Constructor for error type of token.
    ///
    /// \throw InvalidParameter Invalid error code value is specified.
    /// \brief error_code A pre-defined constant of \c ErrorCode.
151
    explicit MasterToken(ErrorCode error_code) : type_(ERROR) {
152
        if (!(error_code < MAX_ERROR_CODE)) {
153
154
155
156
157
            isc_throw(InvalidParameter, "Invalid master lexer error code: "
                      << error_code);
        }
        val_.error_code_ = error_code;
    }
158

159
160
161
    /// \brief Return the token type.
    ///
    /// \throw none
162
    Type getType() const { return (type_); }
163
164
165
166
167
168
169

    /// \brief Return the value of a string-variant token.
    ///
    /// \throw InvalidOperation Called on a non string-variant types of token.
    /// \return A reference to \c StringRegion corresponding to the string
    ///         token value.
    const StringRegion& getStringRegion() const {
170
171
        if (type_ != STRING && type_ != QSTRING) {
            isc_throw(InvalidOperation,
172
                      "Token::getStringRegion() for non string-variant type");
173
        }
174
        return (val_.str_region_);
175
    }
176
177
178
179
180
181
182
183
184
185
186
187
188

    /// \brief Return the value of a string-variant token as a string object.
    ///
    /// Note that the underlying string may contain a nul (\0) character
    /// in the middle.  The returned string object will contain all characters
    /// of the valid range of the underlying string.  So some string
    /// operations such as c_str() may not work as expected.
    ///
    /// \throw InvalidOperation Called on a non string-variant types of token.
    /// \throw std::bad_alloc Resource allocation failure in constructing the
    ///                       string object.
    /// \return A std::string object corresponding to the string token value.
    std::string getString() const {
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        std::string ret;
        getString(ret);
        return (ret);
    }

    /// \brief Fill in a string with the value of a string-variant token.
    ///
    /// This is similar to the other version of \c getString(), but
    /// the caller is supposed to pass a placeholder string object.
    /// This will be more efficient if the caller uses the same
    /// \c MasterLexer repeatedly and needs to get string token in the
    /// form of a string object many times as this version could reuse
    /// the existing internal storage of the passed string.
    ///
    /// Any existing content of the passed string will be removed.
    ///
    /// \throw InvalidOperation Called on a non string-variant types of token.
    /// \throw std::bad_alloc Resource allocation failure in constructing the
    ///                       string object.
    ///
    /// \param ret A string object to be filled with the token string.
    void getString(std::string& ret) const {
211
212
        if (type_ != STRING && type_ != QSTRING) {
            isc_throw(InvalidOperation,
213
                      "Token::getString() for non string-variant type");
214
        }
215
216
        ret.assign(val_.str_region_.beg,
                   val_.str_region_.beg + val_.str_region_.len);
217
    }
218
219
220
221
222

    /// \brief Return the value of a string-variant token as a string object.
    ///
    /// \throw InvalidOperation Called on a non number type of token.
    /// \return The integer corresponding to the number token value.
223
224
225
226
    uint32_t getNumber() const {
        if (type_ != NUMBER) {
            isc_throw(InvalidOperation,
                      "Token::getNumber() for non number type");
227
        }
228
229
        return (val_.number_);
    }
230
231
232
233
234

    /// \brief Return the error code of a error type token.
    ///
    /// \throw InvalidOperation Called on a non error type of token.
    /// \return The error code of the token.
235
236
237
238
239
240
241
    ErrorCode getErrorCode() const {
        if (type_ != ERROR) {
            isc_throw(InvalidOperation,
                      "Token::getErrorCode() for non error type");
        }
        return (val_.error_code_);
    };
242
243
244
245
246
247
248
249
250
251

    /// \brief Return a textual description of the error of a error type token.
    ///
    /// The returned string would be useful to produce a log message when
    /// a zone file parser encounters an error.
    ///
    /// \throw InvalidOperation Called on a non error type of token.
    /// \throw std::bad_alloc Resource allocation failure in constructing the
    ///                       string object.
    /// \return A string object that describes the meaning of the error.
252
    std::string getErrorText() const;
253

254
private:
255
256
257
258
259
260
    Type type_;    // this is not const so the class can be assignable

    // We use a union to represent different types of token values via the
    // unified Token class.  The class integrity should ensure valid operation
    // on the union; getter methods should only refer to the member set at
    // the construction.
261
262
263
    union {
        StringRegion str_region_;
        uint32_t number_;
264
        ErrorCode error_code_;
265
    } val_;
266
267
};

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
/// \brief Tokenizer for parsing DNS master files.
///
/// The \c MasterLexer class provides tokenize interfaces for parsing DNS
/// master files.  It understands some special rules of master files as
/// defined in RFC 1035, such as comments, character escaping, and multi-line
/// data, and provides the user application with the actual data in a
/// more convenient form such as a std::string object.
///
/// In order to support the $INCLUDE notation, this class is designed to be
/// able to operate on multiple files or input streams in the nested way.
/// The \c pushSource() and \c popSource() methods correspond to the push
/// and pop operations.
///
/// While this class is public, it is less likely to be used by normal
/// applications; it's mainly expected to be used within this library,
/// specifically by the \c MasterLoader class and \c Rdata implementation
/// classes.
///
/// \note The error handling policy of this class is slightly different from
/// that of other classes of this library.  We generally throw an exception
/// for an invalid input, whether it's more likely to be a program error or
/// a "user error", which means an invalid input that comes from outside of
/// the library.  But, this class returns an error code for some certain
/// types of user errors instead of throwing an exception.  Such cases include
/// a syntax error identified by the lexer or a misspelled file name that
/// causes a system error at the time of open.  This is based on the assumption
/// that the main user of this class is a parser of master files, where
/// we want to give an option to ignore some non fatal errors and continue
/// the parsing.  This will be useful if it just performs overall error
/// checks on a master file.  When the (immediate) caller needs to do explicit
/// error handling, exceptions are not that a useful tool for error reporting
/// because we cannot separate the normal and error cases anyway, which would
/// be one major advantage when we use exceptions.  And, exceptions are
/// generally more expensive, either when it happens or just by being able
/// to handle with \c try and \c catch (depending on the underlying
/// implementation of the exception handling).  For these reasons, some of
/// this class does not throw for an error that would be reported as an
/// exception in other classes.
class MasterLexer {
    friend class master_lexer_internal::State;
public:
    /// \brief Exception thrown when we fail to read from the input
    /// stream or file.
311
312
    class ReadError : public Unexpected {
    public:
313
314
315
316
317
        ReadError(const char* file, size_t line, const char* what) :
            Unexpected(file, line, what)
        {}
    };

JINMEI Tatuya's avatar
JINMEI Tatuya committed
318
319
320
321
322
323
324
    /// \brief Exception thrown from a wrapper version of
    /// \c MasterLexer::getNextToken() for non fatal errors.
    ///
    /// See the method description for more details.
    ///
    /// The \c token_ member variable (read-only) is set to a \c MasterToken
    /// object of type ERROR indicating the reason for the error.
325
326
327
328
329
330
331
332
333
    class LexerError : public Exception {
    public:
        LexerError(const char* file, size_t line, MasterToken error_token) :
            Exception(file, line, error_token.getErrorText().c_str()),
            token_(error_token)
        {}
        const MasterToken token_;
    };

334
335
336
337
338
339
340
341
342
343
    /// \brief Special value for input source size meaning "unknown".
    ///
    /// This constant value will be used as a return value of
    /// \c getTotalSourceSize() when the size of one of the pushed sources
    /// is unknown.  Note that this value itself is a valid integer in the
    /// range of the type, so there's still a small possibility of
    /// ambiguity.  In practice, however, the value should be sufficiently
    /// large that should eliminate the possibility.
    static const size_t SOURCE_SIZE_UNKNOWN;

344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    /// \brief Options for getNextToken.
    ///
    /// A compound option, indicating multiple options are set, can be
    /// specified using the logical OR operator (operator|()).
    enum Options {
        NONE = 0,               ///< No option
        INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
                        ///< end-of-line
        QSTRING = 2,    ///< recognize quoted string
        NUMBER = 4   ///< recognize numeric text as integer
    };

    /// \brief The constructor.
    ///
    /// \throw std::bad_alloc Internal resource allocation fails (rare case).
    MasterLexer();

    /// \brief The destructor.
    ///
    /// It internally closes any remaining input sources.
    ~MasterLexer();

    /// \brief Open a file and make it the current input source of MasterLexer.
    ///
    /// The opened file can be explicitly closed by the \c popSource() method;
    /// if \c popSource() is not called within the lifetime of the
    /// \c MasterLexer, it will be closed in the destructor.
    ///
    /// In the case possible system errors in opening the file (most likely
    /// because of specifying a non-existent or unreadable file), it returns
    /// false, and if the optional \c error parameter is non NULL, it will be
    /// set to a description of the error (any existing content of the string
    /// will be discarded).  If opening the file succeeds, the given
    /// \c error parameter will be intact.
    ///
    /// Note that this method has two styles of error reporting: one by
    /// returning \c false (and setting \c error optionally) and the other
    /// by throwing an exception.  See the note for the class description
    /// about the distinction.
    ///
    /// \throw InvalidParameter filename is NULL
    /// \param filename A non NULL string specifying a master file
    /// \param error If non null, a placeholder to set error description in
    /// case of failure.
    ///
    /// \return true if pushing the file succeeds; false otherwise.
    bool pushSource(const char* filename, std::string* error = NULL);

    /// \brief Make the given stream the current input source of MasterLexer.
    ///
    /// The caller still holds the ownership of the passed stream; it's the
    /// caller's responsibility to keep it valid as long as it's used in
    /// \c MasterLexer or to release any resource for the stream after that.
    /// The caller can explicitly tell \c MasterLexer to stop using the
    /// stream by calling the \c popSource() method.
    ///
400
401
402
403
    /// The data in \c input must be complete at the time of this call.
    /// The behavior of the lexer is undefined if the caller builds or adds
    /// data in \c input after pushing it.
    ///
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    /// \param input An input stream object that produces textual
    /// representation of DNS RRs.
    void pushSource(std::istream& input);

    /// \brief Stop using the most recently opened input source (file or
    /// stream).
    ///
    /// If it's a file, the previously opened file will be closed internally.
    /// If it's a stream, \c MasterLexer will simply stop using
    /// the stream; the caller can assume it will be never used in
    /// \c MasterLexer thereafter.
    ///
    /// This method must not be called when there is no source pushed for
    /// \c MasterLexer.  This method is otherwise exception free.
    ///
    /// \throw isc::InvalidOperation Called with no pushed source.
    void popSource();

422
423
424
425
426
    /// \brief Get number of sources inside the lexer.
    ///
    /// This method never throws.
    size_t getSourceCount() const;

427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
    /// \brief Return the name of the current input source name.
    ///
    /// If it's a file, it will be the C string given at the corresponding
    /// \c pushSource() call, that is, its filename.  If it's a stream, it will
    /// be formatted as \c "stream-%p" where \c %p is hex representation
    /// of the address of the stream object.
    ///
    /// If there is no opened source at the time of the call, this method
    /// returns an empty string.
    ///
    /// \throw std::bad_alloc Resource allocation failed for string
    /// construction (rare case)
    ///
    /// \return A string representation of the current source (see the
    /// description)
    std::string getSourceName() const;

    /// \brief Return the input source line number.
    ///
    /// If there is an opened source, the return value will be a non-0
    /// integer indicating the line number of the current source where
    /// the \c MasterLexer is currently working.  The expected usage of
    /// this value is to print a helpful error message when parsing fails
    /// by specifically identifying the position of the error.
    ///
    /// If there is no opened source at the time of the call, this method
    /// returns 0.
    ///
    /// \throw None
    ///
    /// \return The current line number of the source (see the description)
    size_t getSourceLine() const;

460
461
462
463
464
465
466
467
468
469
470
    /// \brief Return the total size of pushed sources.
    ///
    /// This method returns the sum of the size of sources that have been
    /// pushed to the lexer by the time of the call.  It would give the
    /// caller of some hint about the amount of data the lexer is working on.
    ///
    /// The size of a normal file is equal to the file size at the time of
    /// the source is pushed.  The size of other type of input stream is
    /// the size of the data available in the stream at the time of the
    /// source is pushed.
    ///
471
472
473
474
475
476
    /// In some special cases, it's possible that the size of the file or
    /// stream is unknown.  It happens, for example, if the standard input
    /// is associated with a pipe from the output of another process and it's
    /// specified as an input source.  If the size of some of the pushed
    /// pushed source is unknown, this method returns SOURCE_SIZE_UNKNOWN.
    ///
477
478
479
480
481
    /// If there is no source pushed in the lexer, it returns 0.
    ///
    /// \throw None
    size_t getTotalSourceSize() const;

482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
    /// \brief Return the position of lexer in the currently pushed sources.
    ///
    /// This method returns the position in terms of the number of recognized
    /// characters from all sources.  Roughly speaking, the position in a
    /// single source is the offset from the beginning of the file or stream
    /// to the current "read cursor" of the lexer, and the return value of
    /// this method is the sum of the position in all the pushed sources.
    ///
    /// If the lexer reaches the end for each of all the pushed sources,
    /// the return value should be equal to that of \c getTotalSourceSize().
    ///
    /// If there is no source pushed in the lexer, it returns 0.
    ///
    /// The return values of this method and \c getTotalSourceSize() would
    /// give the caller an idea of the progress of the lexer at the time of
    /// the call.  Note, however, that since it's not predictable whether
    /// more sources will be pushed after the call, the progress determined
    /// this way may not make much sense; it can only give an informational
    /// hint of the progress.
    ///
502
    /// Note also that if a source is popped, this method will normally return
503
504
505
506
507
508
509
    /// a smaller number by definition.  Likewise, the conceptual "read
    /// cursor" would move backward after a call to \c ungetToken(), in which
    /// case this method will return a smaller value, too.
    ///
    /// \throw None
    size_t getPosition() const;

510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
    /// \brief Parse and return another token from the input.
    ///
    /// It reads a bit of the last opened source and produces another token
    /// found in it.
    ///
    /// This method does not provide the strong exception guarantee. Generally,
    /// if it throws, the object should not be used any more and should be
    /// discarded. It was decided all the exceptions thrown from here are
    /// serious enough that aborting the loading process is the only reasonable
    /// recovery anyway, so the strong exception guarantee is not needed.
    ///
    /// \param options The options can be used to modify the tokenization.
    ///     The method can be made reporting things which are usually ignored
    ///     by this parameter. Multiple options can be passed at once by
    ///     bitwise or (eg. option1 | option 2). See description of available
    ///     options.
    /// \return Next token found in the input. Note that the token refers to
    ///     some internal data in the lexer. It is valid only until
    ///     getNextToken or ungetToken is called. Also, the token becomes
    ///     invalid when the lexer is destroyed.
    /// \throw isc::InvalidOperation in case the source is not available. This
    ///     may mean the pushSource() has not been called yet, or that the
    ///     current source has been read past the end.
    /// \throw ReadError in case there's problem reading from the underlying
    ///     source (eg. I/O error in the file on the disk).
    /// \throw std::bad_alloc in case allocation of some internal resources
    ///     or the token fail.
    const MasterToken& getNextToken(Options options = NONE);

JINMEI Tatuya's avatar
JINMEI Tatuya committed
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
    /// \brief Parse the input for the expected type of token.
    ///
    /// This method is a wrapper of the other version, customized for the case
    /// where a particular type of token is expected as the next one.
    /// More specifically, it's intended to be used to get tokens for RDATA
    /// fields.  Since most RDATA types of fixed format, the token type is
    /// often predictable and the method interface can be simplified.
    ///
    /// This method basically works as follows: it gets the type of the
    /// expected token, calls the other version of \c getNextToken(Options),
    /// and returns the token if it's of the expected type (due to the usage
    /// assumption this should be normally the case).  There are some non
    /// trivial details though:
    ///
    /// - If the expected type is MasterToken::QSTRING, both quoted and
    ///   unquoted strings are recognized and returned.
    /// - If the optional \c eol_ok parameter is \c true (very rare case),
    ///   MasterToken::END_OF_LINE and MasterToken::END_OF_FILE are recognized
    ///   and returned if they are found instead of the expected type of
    ///   token.
    /// - If the next token is not of the expected type (including the case
    ///   a number is expected but it's out of range), ungetToken() is
    ///   internally called so the caller can re-read that token.
    /// - If other types or errors (such as unbalanced parentheses) are
    ///   detected, the erroneous part isn't "ungotten"; the caller can
    ///   continue parsing after that part.
    ///
    /// In some very rare cases where the RDATA has an optional trailing field,
    /// the \c eol_ok parameter would be set to \c true.  This way the caller
    /// can handle both cases (the field does or does not exist) by a single
    /// call to this method.  In all other cases \c eol_ok should be set to
    /// \c false, and that is the default and can be omitted.
    ///
    /// Unlike the other version of \c getNextToken(Options), this method
    /// throws an exception of type \c LexerError for non fatal errors such as
    /// broken syntax or encountering an unexpected type of token.  This way
    /// the caller can write RDATA parser code without bothering to handle
    /// errors for each field.  For example, pseudo parser code for MX RDATA
    /// would look like this:
    /// \code
    ///    const uint32_t pref =
    ///        lexer.getNextToken(MasterToken::NUMBER).getNumber();
    ///    // check if pref is the uint16_t range; no other check is needed.
    ///    const Name mx(lexer.getNextToken(MasterToken::STRING).getString());
    /// \endcode
    ///
    /// In the case where \c LexerError exception is thrown, it's expected
    /// to be handled comprehensively for the parser of the RDATA or at a
    /// higher layer.  The \c token_ member variable of the corresponding
    /// \c LexerError exception object stores a token of type
    /// \c MasterToken::ERROR that indicates the reason for the error.
    ///
    /// Due to the specific intended usage of this method, only a subset
    /// of \c MasterToken::Type values are acceptable for the \c expect
    /// parameter: \c MasterToken::STRING, \c MasterToken::QSTRING, and
    /// \c MasterToken::NUMBER.  Specifying other values will result in
    /// an \c InvalidParameter exception.
    ///
    /// \throw InvalidParameter The expected token type is not allowed for
    /// this method.
    /// \throw LexerError The lexer finds non fatal error or it finds an
    /// \throw other Anything the other version of getNextToken() can throw.
    ///
    /// \param expect Expected type of token.  Must be either STRING, QSTRING,
    /// or NUMBER.
    /// \param eol_ok \c true iff END_OF_LINE or END_OF_FILE is acceptable.
    /// \return The expected type of token.
606
607
608
    const MasterToken& getNextToken(MasterToken::Type expect,
                                    bool eol_ok = false);

609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
    /// \brief Return the last token back to the lexer.
    ///
    /// The method undoes the lasts call to getNextToken(). If you call the
    /// getNextToken() again with the same options, it'll return the same
    /// token. If the options are different, it may return a different token,
    /// but it acts as if the previous getNextToken() was never called.
    ///
    /// It is possible to return only one token back in time (you can't call
    /// ungetToken() twice in a row without calling getNextToken() in between
    /// successfully).
    ///
    /// It does not work after change of source (by pushSource or popSource).
    ///
    /// \throw isc::InvalidOperation If called second time in a row or if
    ///     getNextToken() was not called since the last change of the source.
    void ungetToken();

private:
    struct MasterLexerImpl;
    MasterLexerImpl* impl_;
};

/// \brief Operator to combine \c MasterLexer options
///
/// This is a trivial shortcut so that compound options can be specified
/// in an intuitive way.
inline MasterLexer::Options
operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
    return (static_cast<MasterLexer::Options>(
                static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
}

641
642
643
644
645
646
647
} // namespace dns
} // namespace isc
#endif  // MASTER_LEXER_H

// Local Variables:
// mode: c++
// End: