Commit 9a11ef62 authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[master] Merge branch 'trac2372'

parents 06069bf9 30b659a8
......@@ -97,6 +97,7 @@ libb10_dns___la_SOURCES += master_lexer_inputsource.h master_lexer_inputsource.c
libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
libb10_dns___la_SOURCES += masterload.h masterload.cc
libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
libb10_dns___la_SOURCES += master_lexer_state.h
libb10_dns___la_SOURCES += message.h message.cc
libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
libb10_dns___la_SOURCES += name.h name.cc
......
......@@ -16,6 +16,7 @@
#include <dns/master_lexer.h>
#include <dns/master_lexer_inputsource.h>
#include <dns/master_lexer_state.h>
#include <boost/shared_ptr.hpp>
......@@ -32,10 +33,34 @@ typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
using namespace master_lexer_internal;
struct MasterLexer::MasterLexerImpl {
MasterLexerImpl() : token_(Token::NOT_STARTED) {}
MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
paren_count_(0), last_was_eol_(false)
{}
// A helper method to skip possible comments toward the end of EOL or EOF.
// commonly used by state classes. It returns the corresponding "end-of"
// character in case it's a comment; otherwise it simply returns the
// current character.
int skipComment(int c) {
if (c == ';') {
while (true) {
c = source_->getChar();
if (c == '\n' || c == InputSource::END_OF_STREAM) {
return (c);
}
}
}
return (c);
}
std::vector<InputSourcePtr> sources_;
Token token_;
InputSource* source_; // current source (NULL if sources_ is empty)
Token token_; // currently recognized token (set by a state)
// These are used in states, and defined here only as a placeholder.
// The main lexer class does not need these members.
size_t paren_count_; // nest count of the parentheses
bool last_was_eol_; // whether the lexer just passed an end-of-line
};
MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
......@@ -60,12 +85,14 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
return (false);
}
impl_->source_ = impl_->sources_.back().get();
return (true);
}
void
MasterLexer::pushSource(std::istream& input) {
impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
impl_->source_ = impl_->sources_.back().get();
}
void
......@@ -75,6 +102,8 @@ MasterLexer::popSource() {
"MasterLexer::popSource on an empty source");
}
impl_->sources_.pop_back();
impl_->source_ = impl_->sources_.empty() ? NULL :
impl_->sources_.back().get();
}
std::string
......@@ -115,5 +144,142 @@ MasterLexer::Token::getErrorText() const {
return (error_text[val_.error_code_]);
}
namespace master_lexer_internal {
// Below we implement state classes for state transitions of MasterLexer.
// Note that these need to be defined here so that they can refer to
// the details of MasterLexerImpl.
typedef MasterLexer::Token Token; // convenience shortcut
bool
State::wasLastEOL(const MasterLexer& lexer) const {
return (lexer.impl_->last_was_eol_);
}
const MasterLexer::Token&
State::getToken(const MasterLexer& lexer) const {
return (lexer.impl_->token_);
}
size_t
State::getParenCount(const MasterLexer& lexer) const {
return (lexer.impl_->paren_count_);
}
namespace {
class CRLF : public State {
public:
CRLF() {}
virtual const State* handle(MasterLexer& lexer) const {
// We've just seen '\r'. If this is part of a sequence of '\r\n',
// we combine them as a single END-OF-LINE. Otherwise we treat the
// single '\r' as an EOL and continue tokeniziation from the character
// immediately after '\r'. One tricky case is that there's a comment
// between '\r' and '\n'. This implementation combines these
// characters and treats them as a single EOL (the behavior derived
// from BIND 9). Technically this may not be correct, but in practice
// the caller wouldn't distinguish this case from the case it has
// two EOLs, so we simplify the process.
const int c = getLexerImpl(lexer)->skipComment(
getLexerImpl(lexer)->source_->getChar());
if (c != '\n') {
getLexerImpl(lexer)->source_->ungetChar();
}
getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
getLexerImpl(lexer)->last_was_eol_ = true;
return (NULL);
}
};
// Currently this is provided mostly as a place holder
class String : public State {
public:
String() {}
virtual const State* handle(MasterLexer& /*lexer*/) const {
return (NULL);
}
};
// We use a common instance of a each state in a singleton-like way to save
// construction overhead. They are not singletons in its strict sense as
// we don't prohibit direct construction of these objects. But that doesn't
// matter much anyway, because the definitions are completely hidden within
// this file.
const CRLF CRLF_STATE;
const String STRING_STATE;
}
const State&
State::getInstance(ID state_id) {
switch (state_id) {
case CRLF:
return (CRLF_STATE);
case String:
return (STRING_STATE);
}
// This is a bug of the caller, and this method is only expected to be
// used by tests, so we just forcefully make it fail by asserting the
// condition.
assert(false);
return (STRING_STATE); // a dummy return, to silence some compilers.
}
const State*
State::start(MasterLexer& lexer, MasterLexer::Options options) {
// define some shortcuts
MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
size_t& paren_count = lexerimpl.paren_count_;
while (true) {
const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
if (c == InputSource::END_OF_STREAM) {
lexerimpl.last_was_eol_ = false;
if (paren_count != 0) {
lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
paren_count = 0; // reset to 0; this helps in lenient mode.
return (NULL);
}
lexerimpl.token_ = Token(Token::END_OF_FILE);
return (NULL);
} else if (c == ' ' || c == '\t') {
// If requested and we are not in (), recognize the initial space.
if (lexerimpl.last_was_eol_ && paren_count == 0 &&
(options & MasterLexer::INITIAL_WS) != 0) {
lexerimpl.last_was_eol_ = false;
lexerimpl.token_ = Token(Token::INITIAL_WS);
return (NULL);
}
} else if (c == '\n') {
lexerimpl.last_was_eol_ = true;
if (paren_count == 0) { // we don't recognize EOL if we are in ()
lexerimpl.token_ = Token(Token::END_OF_LINE);
return (NULL);
}
} else if (c == '\r') {
if (paren_count == 0) { // check if we are in () (see above)
return (&CRLF_STATE);
}
} else if (c == '(') {
lexerimpl.last_was_eol_ = false;
++paren_count;
} else if (c == ')') {
lexerimpl.last_was_eol_ = false;
if (paren_count == 0) {
lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
return (NULL);
}
--paren_count;
} else {
// Note: in #2373 we should probably ungetChar().
lexerimpl.last_was_eol_ = false;
return (&STRING_STATE);
}
// no code should be here; we just continue the loop.
}
}
} // namespace master_lexer_internal
} // end of namespace dns
} // end of namespace isc
......@@ -24,6 +24,9 @@
namespace isc {
namespace dns {
namespace master_lexer_internal {
class State;
}
/// \brief Tokenizer for parsing DNS master files.
///
......@@ -64,9 +67,22 @@ namespace dns {
/// this class does not throw for an error that would be reported as an
/// exception in other classes.
class MasterLexer {
friend class master_lexer_internal::State;
public:
class Token; // we define it separately for better readability
/// \brief Options for getNextToken.
///
/// A compound option, indicating multiple options are set, can be
/// specified using the logical OR operator (operator|()).
enum Options {
NONE = 0, ///< No option
INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
///< end-of-line
QSTRING = 2, ///< recognize quoted string
NUMBER = 4 ///< recognize numeric text as integer
};
/// \brief The constructor.
///
/// \throw std::bad_alloc Internal resource allocation fails (rare case).
......@@ -167,6 +183,16 @@ private:
MasterLexerImpl* impl_;
};
/// \brief Operator to combine \c MasterLexer options
///
/// This is a trivial shortcut so that compound options can be specified
/// in an intuitive way.
inline MasterLexer::Options
operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
return (static_cast<MasterLexer::Options>(
static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
}
/// \brief Tokens for \c MasterLexer
///
/// This is a simple value-class encapsulating a type of a lexer token and
......@@ -192,7 +218,8 @@ public:
enum Type {
END_OF_LINE, ///< End of line detected (if asked for detecting it)
END_OF_FILE, ///< End of file detected (if asked for detecting it)
INITIAL_WS, ///< White spaces at the beginning of a line
INITIAL_WS, ///< White spaces at the beginning of a line after an
///< end of line
NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
/// no-value (type only) types.
/// Mainly for internal use.
......
// Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
#ifndef MASTER_LEXER_STATE_H
#define MASTER_LEXER_STATE_H 1
#include <dns/master_lexer.h>
namespace isc {
namespace dns {
namespace master_lexer_internal {
/// \brief Tokenization state for \c MasterLexer.
///
/// This is a base class of classes that represent various states of a single
/// tokenization session of \c MasterLexer, i.e., the states used for a
/// single call to \c MasterLexer::getNextToken().
///
/// It follows the convention of the state design pattern: each derived class
/// corresponds to a specific state, and the state transition takes place
/// through the virtual method named \c handle(). The \c handle() method
/// takes the main \c MasterLexer object that holds all necessary internal
/// context, and updates it as necessary; each \c State derived class is
/// completely stateless.
///
/// The initial transition takes place in a static method of the base class,
/// \c start(). This is mainly for implementation convenience; we need to
/// pass options given to \c MasterLexer::getNextToken() for the initial
/// state, so it makes more sense to separate the interface for the transition
/// from the initial state.
///
/// When an object of a specific state class completes the session, it
/// normally sets the identified token in the lexer, and returns NULL;
/// if more transition is necessary, it returns a pointer to the next state
/// object.
///
/// As is usual in the state design pattern, the \c State class is made
/// a friend class of \c MasterLexer and can refer to its internal details.
/// This is intentional; essentially its a part of \c MasterLexer and
/// is defined as a separate class only for implementation clarity and better
/// testability. It's defined in a publicly visible header, but that's only
/// for testing purposes. No normal application or even no other classes of
/// this library are expected to use this class.
class State {
public:
/// \brief Begin state transitions to get the next token.
///
/// This is the first method that \c MasterLexer needs to call for a
/// tokenization session. The lexer passes a reference to itself
/// and options given in \c getNextToken().
///
/// \throw InputSource::ReadError Unexpected I/O error
/// \throw std::bad_alloc Internal resource allocation failure
///
/// \param lexer The lexer object that holds the main context.
/// \param options The options passed to getNextToken().
/// \return A pointer to the next state object or NULL if the transition
/// is completed.
static const State* start(MasterLexer& lexer,
MasterLexer::Options options);
/// \brief Handle the process of one specific state.
///
/// This method is expected to be called on the object returned by
/// start(), and keep called on the returned object until NULL is
/// returned. The call chain will form the complete state transition.
///
/// \throw InputSource::ReadError Unexpected I/O error
/// \throw std::bad_alloc Internal resource allocation failure
///
/// \param lexer The lexer object that holds the main context.
/// \return A pointer to the next state object or NULL if the transition
/// is completed.
virtual const State* handle(MasterLexer& lexer) const = 0;
/// \brief Types of states.
///
/// Specific states are basically hidden within the implementation,
/// but we'd like to allow tests to examine them, so we provide
/// a way to get an instance of a specific state.
enum ID {
CRLF, ///< Just seen a carriage-return character
String ///< Handling a string token
};
/// \brief Returns a \c State instance of the given state.
///
/// This is provided only for testing purposes so tests can check
/// the behavior of each state separately. \c MasterLexer shouldn't
/// need this method.
static const State& getInstance(ID state_id);
/// \name Read-only accessors for testing purposes.
///
/// These allow tests to inspect some selected portion of the internal
/// states of \c MasterLexer. These shouldn't be used except for testing
/// purposes.
///@{
bool wasLastEOL(const MasterLexer& lexer) const;
const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
size_t getParenCount(const MasterLexer& lexer) const;
///@}
protected:
/// \brief An accessor to the internal implementation class of
/// \c MasterLexer.
///
/// This is provided for specific derived classes as they are not direct
/// friends of \c MasterLexer.
///
/// \param lexer The lexer object that holds the main context.
/// \return A pointer to the implementation class object of the given
/// lexer. This is never NULL.
MasterLexer::MasterLexerImpl* getLexerImpl(MasterLexer& lexer) const {
return (lexer.impl_);
}
};
} // namespace master_lexer_internal
} // namespace dns
} // namespace isc
#endif // MASTER_LEXER_STATE_H
// Local Variables:
// mode: c++
// End:
......@@ -27,6 +27,7 @@ run_unittests_SOURCES += labelsequence_unittest.cc
run_unittests_SOURCES += messagerenderer_unittest.cc
run_unittests_SOURCES += master_lexer_token_unittest.cc
run_unittests_SOURCES += master_lexer_unittest.cc
run_unittests_SOURCES += master_lexer_state_unittest.cc
run_unittests_SOURCES += name_unittest.cc
run_unittests_SOURCES += nsec3hash_unittest.cc
run_unittests_SOURCES += rrclass_unittest.cc rrtype_unittest.cc
......
// Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
#include <dns/master_lexer.h>
#include <dns/master_lexer_inputsource.h>
#include <dns/master_lexer_state.h>
#include <gtest/gtest.h>
#include <sstream>
using namespace isc::dns;
using namespace master_lexer_internal;
namespace {
typedef MasterLexer::Token Token; // shortcut
class MasterLexerStateTest : public ::testing::Test {
protected:
MasterLexerStateTest() : common_options(MasterLexer::INITIAL_WS),
s_null(NULL),
s_crlf(State::getInstance(State::CRLF)),
s_string(State::getInstance(State::String)),
options(MasterLexer::NONE),
orig_options(options)
{}
// Specify INITIAL_WS as common initial options.
const MasterLexer::Options common_options;
MasterLexer lexer;
const State* const s_null;
const State& s_crlf;
const State& s_string;
std::stringstream ss;
MasterLexer::Options options, orig_options;
};
// Common check for the end-of-file condition.
// Token is set to END_OF_FILE, and the lexer was NOT last eol state.
// Passed state can be any valid one; they are stateless, just providing the
// interface for inspection.
void
eofCheck(const State& state, MasterLexer& lexer) {
EXPECT_EQ(Token::END_OF_FILE, state.getToken(lexer).getType());
EXPECT_FALSE(state.wasLastEOL(lexer));
}
TEST_F(MasterLexerStateTest, startAndEnd) {
// A simple case: the input is empty, so we begin with start and
// are immediately done.
lexer.pushSource(ss);
EXPECT_EQ(s_null, State::start(lexer, common_options));
eofCheck(s_crlf, lexer);
}
TEST_F(MasterLexerStateTest, startToEOL) {
ss << "\n";
lexer.pushSource(ss);
EXPECT_EQ(s_null, State::start(lexer, common_options));
EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
// The next lexer session will reach EOF. Same eof check should pass.
EXPECT_EQ(s_null, State::start(lexer, common_options));
eofCheck(s_crlf, lexer);
}
TEST_F(MasterLexerStateTest, space) {
// repeat '\t\n' twice (see below), then space after EOL
ss << " \t\n\t\n ";
lexer.pushSource(ss);
// by default space characters and tabs will be ignored. We check this
// twice; at the second iteration, it's a white space at the beginning
// of line, but since we don't specify INITIAL_WS option, it's treated as
// normal space and ignored.
for (size_t i = 0; i < 2; ++i) {
EXPECT_EQ(s_null, State::start(lexer, MasterLexer::NONE));
EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
}
// Now we specify the INITIAL_WS option. It will be recognized and the
// corresponding token will be returned.
EXPECT_EQ(s_null, State::start(lexer, MasterLexer::INITIAL_WS));
EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
}
TEST_F(MasterLexerStateTest, parentheses) {
ss << "\n(\na\n )\n "; // 1st \n is to check if 'was EOL' is set to false
lexer.pushSource(ss);
EXPECT_EQ(s_null, State::start(lexer, common_options)); // handle \n
// Now handle '('. It skips \n and recognize 'a' as string
EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // check pre condition
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
// skip 'a' (note: until #2373 it's actually skipped as part of the '('
// handling)
s_string.handle(lexer);
// Then handle ')'. '\n' before ')' isn't recognized because
// it's canceled due to the '('. Likewise, the space after the '\n'
// shouldn't be recognized but should be just ignored.
EXPECT_EQ(s_null, State::start(lexer, common_options));
EXPECT_EQ(0, s_crlf.getParenCount(lexer));
// Now, temporarily disabled options are restored: Both EOL and the
// initial WS are recognized
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
EXPECT_EQ(s_null, State::start(lexer, common_options));
EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
}
TEST_F(MasterLexerStateTest, nestedParentheses) {
// This is an unusual, but allowed (in this implementation) case.
ss << "(a(b)\n c)\n ";
lexer.pushSource(ss);
EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
s_string.handle(lexer); // consume 'a'
EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
s_string.handle(lexer); // consume 'b'
EXPECT_EQ(2, s_crlf.getParenCount(lexer)); // now the count is 2
// Close the inner most parentheses. count will be decreased, but option
// shouldn't be restored yet, so the intermediate EOL or initial WS won't
// be recognized.
EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume ')'
s_string.handle(lexer); // consume 'c'
EXPECT_EQ(1, s_crlf.getParenCount(lexer));
// Close the outermost parentheses. count will be reset to 0, and original
// options are restored.
EXPECT_EQ(s_null, State::start(lexer, common_options));
// Now, temporarily disabled options are restored: Both EOL and the
// initial WS are recognized
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
EXPECT_EQ(s_null, State::start(lexer, common_options));
EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
}
TEST_F(MasterLexerStateTest, unbalancedParentheses) {
// Only closing paren is provided. We prepend a \n to check if it's
// correctly canceled after detecting the error.
ss << "\n)";
ss << "(a";
lexer.pushSource(ss);
EXPECT_EQ(s_null, State::start(lexer, common_options)); // consume '\n'
EXPECT_TRUE(s_crlf.wasLastEOL(lexer)); // this \n was remembered
// Now checking ')'. The result should be error, count shouldn't be
// changed. "last EOL" should be canceled.
EXPECT_EQ(0, s_crlf.getParenCount(lexer));
EXPECT_EQ(s_null, State::start(lexer, common_options));
EXPECT_EQ(0, s_crlf.getParenCount(lexer));
ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
// Reach EOF with a dangling open parenthesis.
EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
s_string.handle(lexer); // consume 'a'
EXPECT_EQ(1, s_crlf.getParenCount(lexer));
EXPECT_EQ(s_null, State::start(lexer, common_options)); // reach EOF
ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // should be reset to 0
}
TEST_F(MasterLexerStateTest, startToComment) {