master_lexer.h 15.4 KB
 JINMEI Tatuya committed Oct 25, 2012 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ``````// Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice appear in all copies. // // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR // PERFORMANCE OF THIS SOFTWARE. #ifndef MASTER_LEXER_H #define MASTER_LEXER_H 1 #include `````` JINMEI Tatuya committed Oct 29, 2012 20 ``````#include `````` JINMEI Tatuya committed Oct 25, 2012 21 22 23 24 25 26 ``````#include #include namespace isc { namespace dns { `````` JINMEI Tatuya committed Nov 02, 2012 27 28 29 ``````namespace master_lexer_internal { class State; } `````` JINMEI Tatuya committed Oct 25, 2012 30 `````` `````` JINMEI Tatuya committed Oct 29, 2012 31 32 33 34 ``````/// \brief Tokenizer for parsing DNS master files. /// /// The \c MasterLexer class provides tokenize interfaces for parsing DNS /// master files. It understands some special rules of master files as `````` Mukund Sivaraman committed Nov 01, 2012 35 ``````/// defined in RFC 1035, such as comments, character escaping, and multi-line `````` JINMEI Tatuya committed Oct 29, 2012 36 37 38 39 40 ``````/// data, and provides the user application with the actual data in a /// more convenient form such as a std::string object. /// /// In order to support the \$INCLUDE notation, this class is designed to be /// able to operate on multiple files or input streams in the nested way. `````` JINMEI Tatuya committed Nov 02, 2012 41 42 ``````/// The \c pushSource() and \c popSource() methods correspond to the push /// and pop operations. `````` JINMEI Tatuya committed Oct 29, 2012 43 44 45 46 47 ``````/// /// While this class is public, it is less likely to be used by normal /// applications; it's mainly expected to be used within this library, /// specifically by the \c MasterLoader class and \c Rdata implementation /// classes. `````` JINMEI Tatuya committed Oct 25, 2012 48 ``````class MasterLexer { `````` JINMEI Tatuya committed Nov 02, 2012 49 `````` friend class master_lexer_internal::State; `````` JINMEI Tatuya committed Oct 25, 2012 50 ``````public: `````` JINMEI Tatuya committed Oct 26, 2012 51 `````` class Token; // we define it separately for better readability `````` JINMEI Tatuya committed Oct 29, 2012 52 `````` `````` JINMEI Tatuya committed Nov 12, 2012 53 54 55 56 `````` /// \brief Options for getNextToken. /// /// A compound option, indicating multiple options are set, can be /// specified using the logical OR operator (operator|()). `````` JINMEI Tatuya committed Nov 02, 2012 57 `````` enum Options { `````` JINMEI Tatuya committed Nov 13, 2012 58 59 60 `````` NONE = 0, ///< No option INITIAL_WS = 1, ///< recognize begin-of-line spaces after an ///< end-of-line `````` JINMEI Tatuya committed Nov 12, 2012 61 62 `````` QSTRING = 2, ///< recognize quoted string NUMBER = 4 ///< recognize numeric text as integer `````` JINMEI Tatuya committed Nov 02, 2012 63 64 `````` }; `````` JINMEI Tatuya committed Oct 29, 2012 65 66 67 `````` /// \brief The constructor. /// /// \throw std::bad_alloc Internal resource allocation fails (rare case). `````` JINMEI Tatuya committed Oct 29, 2012 68 `````` MasterLexer(); `````` JINMEI Tatuya committed Oct 29, 2012 69 70 71 72 `````` /// \brief The destructor. /// /// It internally closes any remaining input sources. `````` JINMEI Tatuya committed Oct 29, 2012 73 `````` ~MasterLexer(); `````` JINMEI Tatuya committed Oct 29, 2012 74 75 76 `````` /// \brief Open a file and make it the current input source of MasterLexer. /// `````` JINMEI Tatuya committed Nov 02, 2012 77 78 79 `````` /// The opened file can be explicitly closed by the \c popSource() method; /// if \c popSource() is not called within the lifetime of the /// \c MasterLexer, it will be closed in the destructor. `````` JINMEI Tatuya committed Oct 29, 2012 80 `````` /// `````` JINMEI Tatuya committed Nov 02, 2012 81 82 83 84 85 86 87 `````` /// In the case possible system errors in opening the file (most likely /// because of specifying a non-existent or unreadable file), it returns /// false, and if the optional \c error parameter is non NULL, it will be /// set to a description of the error (any existing content of the string /// will be discarded). If opening the file succeeds, the given /// \c error parameter will be intact. /// `````` JINMEI Tatuya committed Oct 29, 2012 88 89 `````` /// \throw InvalidParameter filename is NULL /// \param filename A non NULL string specifying a master file `````` JINMEI Tatuya committed Nov 02, 2012 90 91 92 93 94 `````` /// \param error If non null, a placeholder to set error description in /// case of failure. /// /// \return true if pushing the file succeeds; false otherwise. bool pushSource(const char* filename, std::string* error = NULL); `````` JINMEI Tatuya committed Oct 29, 2012 95 96 97 98 99 100 101 `````` /// \brief Make the given stream the current input source of MasterLexer. /// /// The caller still holds the ownership of the passed stream; it's the /// caller's responsibility to keep it valid as long as it's used in /// \c MasterLexer or to release any resource for the stream after that. /// The caller can explicitly tell \c MasterLexer to stop using the `````` JINMEI Tatuya committed Nov 02, 2012 102 `````` /// stream by calling the \c popSource() method. `````` JINMEI Tatuya committed Oct 29, 2012 103 104 105 `````` /// /// \param input An input stream object that produces textual /// representation of DNS RRs. `````` JINMEI Tatuya committed Nov 02, 2012 106 `````` void pushSource(std::istream& input); `````` JINMEI Tatuya committed Oct 29, 2012 107 `````` `````` JINMEI Tatuya committed Nov 02, 2012 108 109 `````` /// \brief Stop using the most recently opened input source (file or /// stream). `````` JINMEI Tatuya committed Oct 29, 2012 110 `````` /// `````` JINMEI Tatuya committed Nov 02, 2012 111 `````` /// If it's a file, the previously opened file will be closed internally. `````` JINMEI Tatuya committed Nov 01, 2012 112 `````` /// If it's a stream, \c MasterLexer will simply stop using `````` JINMEI Tatuya committed Oct 29, 2012 113 114 115 `````` /// the stream; the caller can assume it will be never used in /// \c MasterLexer thereafter. /// `````` JINMEI Tatuya committed Nov 02, 2012 116 `````` /// This method must not be called when there is no source pushed for `````` JINMEI Tatuya committed Oct 29, 2012 117 118 `````` /// \c MasterLexer. This method is otherwise exception free. /// `````` JINMEI Tatuya committed Nov 02, 2012 119 120 `````` /// \throw isc::InvalidOperation Called with no pushed source. void popSource(); `````` JINMEI Tatuya committed Oct 29, 2012 121 `````` `````` Mukund Sivaraman committed Nov 01, 2012 122 `````` /// \brief Return the name of the current input source name. `````` JINMEI Tatuya committed Oct 29, 2012 123 124 `````` /// /// If it's a file, it will be the C string given at the corresponding `````` JINMEI Tatuya committed Nov 02, 2012 125 `````` /// \c pushSource() call, that is, its filename. If it's a stream, it will `````` Mukund Sivaraman committed Nov 01, 2012 126 127 `````` /// be formatted as \c "stream-%p" where \c %p is hex representation /// of the address of the stream object. `````` JINMEI Tatuya committed Oct 29, 2012 128 129 130 131 132 133 134 135 136 `````` /// /// If there is no opened source at the time of the call, this method /// returns an empty string. /// /// \throw std::bad_alloc Resource allocation failed for string /// construction (rare case) /// /// \return A string representation of the current source (see the /// description) `````` JINMEI Tatuya committed Oct 29, 2012 137 `````` std::string getSourceName() const; `````` JINMEI Tatuya committed Oct 29, 2012 138 139 140 141 142 143 144 145 146 147 148 149 150 151 `````` /// \brief Return the input source line number. /// /// If there is an opened source, the return value will be a non-0 /// integer indicating the line number of the current source where /// the \c MasterLexer is currently working. The expected usage of /// this value is to print a helpful error message when parsing fails /// by specifically identifying the position of the error. /// /// If there is no opened source at the time of the call, this method /// returns 0. /// /// \throw None /// `````` JINMEI Tatuya committed Nov 01, 2012 152 `````` /// \return The current line number of the source (see the description) `````` JINMEI Tatuya committed Oct 29, 2012 153 154 155 156 157 `````` size_t getSourceLine() const; private: struct MasterLexerImpl; MasterLexerImpl* impl_; `````` JINMEI Tatuya committed Oct 26, 2012 158 ``````}; `````` JINMEI Tatuya committed Oct 25, 2012 159 `````` `````` JINMEI Tatuya committed Nov 02, 2012 160 161 162 163 164 165 166 167 168 169 ``````/// \brief Operator to combine \c MasterLexer options /// /// This is a trivial shortcut so that compound options can be specified /// in an intuitive way. inline MasterLexer::Options operator|(MasterLexer::Options o1, MasterLexer::Options o2) { return (static_cast( static_cast(o1) | static_cast(o2))); } `````` JINMEI Tatuya committed Oct 26, 2012 170 171 172 173 174 175 176 177 178 179 180 181 182 183 ``````/// \brief Tokens for \c MasterLexer /// /// This is a simple value-class encapsulating a type of a lexer token and /// (if it has a value) its value. Essentially, the class provides /// constructors corresponding to different types of tokens, and corresponding /// getter methods. The type and value are fixed at the time of construction /// and will never be modified throughout the lifetime of the object. /// The getter methods are still provided to maximize the safety; an /// application cannot refer to a value that is invalid for the type of token. /// /// This class is intentionally implemented as copyable and assignable /// (using the default version of copy constructor and assignment operator), /// but it's mainly for internal implementation convenience. Applications will /// simply refer to Token object as a reference via the \c MasterLexer class. `````` JINMEI Tatuya committed Oct 26, 2012 184 185 ``````class MasterLexer::Token { public: `````` JINMEI Tatuya committed Oct 26, 2012 186 `````` /// \brief Enumeration for token types `````` JINMEI Tatuya committed Oct 29, 2012 187 188 189 190 191 `````` /// /// \note At the time of initial implementation, all numeric tokens /// that would be extracted from \c MasterLexer should be represented /// as an unsigned 32-bit integer. If we see the need for larger integers /// or negative numbers, we can then extend the token types. `````` JINMEI Tatuya committed Oct 26, 2012 192 `````` enum Type { `````` JINMEI Tatuya committed Oct 26, 2012 193 194 `````` END_OF_LINE, ///< End of line detected (if asked for detecting it) END_OF_FILE, ///< End of file detected (if asked for detecting it) `````` JINMEI Tatuya committed Nov 13, 2012 195 196 `````` INITIAL_WS, ///< White spaces at the beginning of a line after an ///< end of line `````` JINMEI Tatuya committed Oct 26, 2012 197 198 199 200 201 202 203 `````` NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to /// no-value (type only) types. /// Mainly for internal use. STRING, ///< A single string QSTRING, ///< A single string quoted by double-quotes ("). NUMBER, ///< A decimal number (unsigned 32-bit) ERROR ///< Error detected in getting a token `````` JINMEI Tatuya committed Oct 26, 2012 204 `````` }; `````` JINMEI Tatuya committed Oct 25, 2012 205 `````` `````` JINMEI Tatuya committed Oct 26, 2012 206 `````` /// \brief Enumeration for lexer error codes `````` JINMEI Tatuya committed Oct 26, 2012 207 `````` enum ErrorCode { `````` JINMEI Tatuya committed Oct 26, 2012 208 209 210 211 212 213 214 `````` NOT_STARTED, ///< The lexer is just initialized and has no token UNBALANCED_PAREN, ///< Unbalanced parentheses detected UNEXPECTED_END, ///< The lexer reaches the end of line or file /// unexpectedly UNBALANCED_QUOTES, ///< Unbalanced quotations detected MAX_ERROR_CODE ///< Max integer corresponding to valid error codes. /// (excluding this one). Mainly for internal use. `````` JINMEI Tatuya committed Oct 26, 2012 215 216 `````` }; `````` JINMEI Tatuya committed Oct 26, 2012 217 218 219 220 221 222 223 224 225 226 227 `````` /// \brief A simple representation of a range of a string. /// /// This is a straightforward pair of the start pointer of a string /// and its length. The \c STRING and \c QSTRING types of tokens /// will be primarily represented in this form. /// /// Any character can be stored in the valid range of the region. /// In particular, there can be a nul character (\0) in the middle of /// the region. On the other hand, it is not ensured that the string /// is nul-terminated. So the usual string manipulation API may not work /// as expected. `````` JINMEI Tatuya committed Oct 26, 2012 228 `````` struct StringRegion { `````` JINMEI Tatuya committed Oct 26, 2012 229 230 `````` const char* beg; ///< The start address of the string size_t len; ///< The length of the string in bytes `````` JINMEI Tatuya committed Oct 26, 2012 231 232 `````` }; `````` JINMEI Tatuya committed Oct 26, 2012 233 234 235 236 237 `````` /// \brief Constructor for non-value type of token. /// /// \throw InvalidParameter A value type token is specified. /// \param type The type of the token. It must indicate a non-value /// type (not larger than \c NOVALUE_TYPE_MAX). `````` JINMEI Tatuya committed Oct 26, 2012 238 `````` explicit Token(Type type) : type_(type) { `````` JINMEI Tatuya committed Oct 26, 2012 239 `````` if (type > NOVALUE_TYPE_MAX) { `````` JINMEI Tatuya committed Oct 26, 2012 240 241 `````` isc_throw(InvalidParameter, "Token per-type constructor " "called with invalid type: " << type); `````` JINMEI Tatuya committed Oct 25, 2012 242 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 243 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 `````` /// \brief Constructor for string and quoted-string types of token. /// /// The optional \c quoted parameter specifies whether it's a quoted or /// non quoted string. /// /// The string is specified as a pair of a pointer to the start address /// and its length. Any character can be contained in any position of /// the valid range (see \c StringRegion). /// /// When it's a quoted string, the quotation marks must be excluded /// from the specified range. /// /// \param str_beg The start address of the string /// \param str_len The size of the string in bytes /// \param quoted true if it's a quoted string; false otherwise. `````` JINMEI Tatuya committed Oct 26, 2012 260 261 262 263 264 265 `````` Token(const char* str_beg, size_t str_len, bool quoted = false) : type_(quoted ? QSTRING : STRING) { val_.str_region_.beg = str_beg; val_.str_region_.len = str_len; } `````` JINMEI Tatuya committed Oct 26, 2012 266 267 268 269 270 `````` /// \brief Constructor for number type of token. /// /// \brief number An unsigned 32-bit integer corresponding to the token /// value. `````` JINMEI Tatuya committed Oct 26, 2012 271 272 273 `````` explicit Token(uint32_t number) : type_(NUMBER) { val_.number_ = number; } `````` JINMEI Tatuya committed Oct 26, 2012 274 275 276 277 278 `````` /// \brief Constructor for error type of token. /// /// \throw InvalidParameter Invalid error code value is specified. /// \brief error_code A pre-defined constant of \c ErrorCode. `````` JINMEI Tatuya committed Oct 26, 2012 279 `````` explicit Token(ErrorCode error_code) : type_(ERROR) { `````` JINMEI Tatuya committed Oct 26, 2012 280 `````` if (!(error_code < MAX_ERROR_CODE)) { `````` JINMEI Tatuya committed Oct 26, 2012 281 282 283 284 285 `````` isc_throw(InvalidParameter, "Invalid master lexer error code: " << error_code); } val_.error_code_ = error_code; } `````` JINMEI Tatuya committed Oct 25, 2012 286 `````` `````` JINMEI Tatuya committed Oct 26, 2012 287 288 289 `````` /// \brief Return the token type. /// /// \throw none `````` JINMEI Tatuya committed Oct 26, 2012 290 `````` Type getType() const { return (type_); } `````` JINMEI Tatuya committed Oct 26, 2012 291 292 293 294 295 296 297 `````` /// \brief Return the value of a string-variant token. /// /// \throw InvalidOperation Called on a non string-variant types of token. /// \return A reference to \c StringRegion corresponding to the string /// token value. const StringRegion& getStringRegion() const { `````` JINMEI Tatuya committed Oct 26, 2012 298 299 `````` if (type_ != STRING && type_ != QSTRING) { isc_throw(InvalidOperation, `````` JINMEI Tatuya committed Oct 26, 2012 300 `````` "Token::getStringRegion() for non string-variant type"); `````` JINMEI Tatuya committed Oct 25, 2012 301 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 302 `````` return (val_.str_region_); `````` JINMEI Tatuya committed Oct 26, 2012 303 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 304 305 306 307 308 309 310 311 312 313 314 315 316 `````` /// \brief Return the value of a string-variant token as a string object. /// /// Note that the underlying string may contain a nul (\0) character /// in the middle. The returned string object will contain all characters /// of the valid range of the underlying string. So some string /// operations such as c_str() may not work as expected. /// /// \throw InvalidOperation Called on a non string-variant types of token. /// \throw std::bad_alloc Resource allocation failure in constructing the /// string object. /// \return A std::string object corresponding to the string token value. std::string getString() const { `````` JINMEI Tatuya committed Oct 26, 2012 317 318 `````` if (type_ != STRING && type_ != QSTRING) { isc_throw(InvalidOperation, `````` JINMEI Tatuya committed Oct 26, 2012 319 `````` "Token::getString() for non string-variant type"); `````` JINMEI Tatuya committed Oct 25, 2012 320 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 321 322 `````` return (std::string(val_.str_region_.beg, val_.str_region_.beg + val_.str_region_.len)); `````` JINMEI Tatuya committed Oct 26, 2012 323 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 324 325 326 327 328 `````` /// \brief Return the value of a string-variant token as a string object. /// /// \throw InvalidOperation Called on a non number type of token. /// \return The integer corresponding to the number token value. `````` JINMEI Tatuya committed Oct 26, 2012 329 330 331 332 `````` uint32_t getNumber() const { if (type_ != NUMBER) { isc_throw(InvalidOperation, "Token::getNumber() for non number type"); `````` JINMEI Tatuya committed Oct 25, 2012 333 `````` } `````` JINMEI Tatuya committed Oct 26, 2012 334 335 `````` return (val_.number_); } `````` JINMEI Tatuya committed Oct 26, 2012 336 337 338 339 340 `````` /// \brief Return the error code of a error type token. /// /// \throw InvalidOperation Called on a non error type of token. /// \return The error code of the token. `````` JINMEI Tatuya committed Oct 26, 2012 341 342 343 344 345 346 347 `````` ErrorCode getErrorCode() const { if (type_ != ERROR) { isc_throw(InvalidOperation, "Token::getErrorCode() for non error type"); } return (val_.error_code_); }; `````` JINMEI Tatuya committed Oct 26, 2012 348 349 350 351 352 353 354 355 356 357 `````` /// \brief Return a textual description of the error of a error type token. /// /// The returned string would be useful to produce a log message when /// a zone file parser encounters an error. /// /// \throw InvalidOperation Called on a non error type of token. /// \throw std::bad_alloc Resource allocation failure in constructing the /// string object. /// \return A string object that describes the meaning of the error. `````` JINMEI Tatuya committed Oct 26, 2012 358 `````` std::string getErrorText() const; `````` JINMEI Tatuya committed Oct 25, 2012 359 `````` `````` JINMEI Tatuya committed Oct 26, 2012 360 ``````private: `````` JINMEI Tatuya committed Oct 26, 2012 361 362 363 364 365 366 `````` Type type_; // this is not const so the class can be assignable // We use a union to represent different types of token values via the // unified Token class. The class integrity should ensure valid operation // on the union; getter methods should only refer to the member set at // the construction. `````` JINMEI Tatuya committed Oct 26, 2012 367 368 369 `````` union { StringRegion str_region_; uint32_t number_; `````` JINMEI Tatuya committed Oct 26, 2012 370 `````` ErrorCode error_code_; `````` JINMEI Tatuya committed Oct 26, 2012 371 `````` } val_; `````` JINMEI Tatuya committed Oct 25, 2012 372 373 374 375 376 377 378 379 380 ``````}; } // namespace dns } // namespace isc #endif // MASTER_LEXER_H // Local Variables: // mode: c++ // End:``````