Commit 2972b769 authored by Jelte Jansen's avatar Jelte Jansen
Browse files

[master] Merge branch 'trac2374'

parents f8f96f9e 522f3f2b
......@@ -30,7 +30,7 @@ namespace dns {
namespace {
typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
}
} // end unnamed namespace
using namespace master_lexer_internal;
......@@ -213,7 +213,7 @@ const char* const error_text[] = {
"no token produced" // NO_TOKEN_PRODUCED
};
const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
}
} // end unnamed namespace
std::string
MasterLexer::Token::getErrorText() const {
......@@ -288,6 +288,13 @@ public:
virtual void handle(MasterLexer& lexer) const;
};
class Number : public State {
public:
Number() {}
virtual ~Number() {}
virtual const State* handle(MasterLexer& lexer) const;
};
// We use a common instance of a each state in a singleton-like way to save
// construction overhead. They are not singletons in its strict sense as
// we don't prohibit direct construction of these objects. But that doesn't
......@@ -296,7 +303,8 @@ public:
const CRLF CRLF_STATE;
const String STRING_STATE;
const QString QSTRING_STATE;
}
const Number NUMBER_STATE;
} // end unnamed namespace
const State&
State::getInstance(ID state_id) {
......@@ -307,6 +315,8 @@ State::getInstance(ID state_id) {
return (STRING_STATE);
case QString:
return (QSTRING_STATE);
case Number:
return (NUMBER_STATE);
}
// This is a bug of the caller, and this method is only expected to be
......@@ -367,6 +377,11 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
return (NULL);
}
--paren_count;
} else if (isdigit(c)) {
lexerimpl.last_was_eol_ = false;
// this character will be handled in the number state
lexerimpl.source_->ungetChar();
return (&NUMBER_STATE);
} else {
// this character will be handled in the string state
lexerimpl.source_->ungetChar();
......@@ -431,6 +446,43 @@ QString::handle(MasterLexer& lexer) const {
}
}
const State*
Number::handle(MasterLexer& lexer) const {
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
// Do we want to support octal and/or hex here?
const int base = 10;
// It may yet turn out to be a string, so we first
// collect all the data
bool digits_only = true;
std::vector<char>& data = getLexerImpl(lexer)->data_;
data.clear();
bool escaped = false;
while (true) {
const int c = getLexerImpl(lexer)->skipComment(
getLexerImpl(lexer)->source_->getChar(), escaped);
if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
getLexerImpl(lexer)->source_->ungetChar();
if (digits_only) {
// Close the string for strtoul
data.push_back('\0');
token = MasterLexer::Token(strtoul(&data.at(0),
NULL, base));
} else {
token = MasterLexer::Token(&data.at(0),
data.size());
}
return (NULL);
}
if (!isdigit(c)) {
digits_only = false;
}
escaped = (c == '\\' && !escaped);
data.push_back(c);
}
}
} // namespace master_lexer_internal
} // end of namespace dns
......
......@@ -101,7 +101,8 @@ public:
enum ID {
CRLF, ///< Just seen a carriage-return character
String, ///< Handling a string token
QString ///< Handling a quoted string token
QString, ///< Handling a quoted string token
Number ///< Handling a number
};
/// \brief Returns a \c State instance of the given state.
......
......@@ -33,6 +33,7 @@ protected:
s_crlf(State::getInstance(State::CRLF)),
s_string(State::getInstance(State::String)),
s_qstring(State::getInstance(State::QString)),
s_number(State::getInstance(State::Number)),
options(MasterLexer::NONE),
orig_options(options)
{}
......@@ -44,6 +45,7 @@ protected:
const State& s_crlf;
const State& s_string;
const State& s_qstring;
const State& s_number;
std::stringstream ss;
MasterLexer::Options options, orig_options;
};
......@@ -450,4 +452,137 @@ TEST_F(MasterLexerStateTest, brokenQuotedString) {
EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
}
TEST_F(MasterLexerStateTest, basicNumbers) {
ss << "0 ";
ss << "1 ";
ss << "12345 ";
ss << "4294967295 "; // 2^32-1
ss << "4294967296 "; // 2^32 (this overflows to 0, we
// can consider failing on it, but
// this is what bind9 does as well)
ss << "4294967297 "; // 2^32+1 (this overflows to 1, see
// above)
ss << "1000000000000000000 "; // overflows to 2808348672
ss << "005 "; // Leading zeroes are ignored
ss << "42;asdf\n"; // Number with comment
ss << "37"; // Simple number again, here to make
// sure none of the above messed up
// the tokenizer
lexer.pushSource(ss);
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(0, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(1, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(12345, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(4294967295, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(0, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(1, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(2808348672, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(5, s_number.getToken(lexer).getNumber());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(42, s_number.getToken(lexer).getNumber());
EXPECT_EQ(s_null, State::start(lexer, common_options));
EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
EXPECT_EQ(37, s_number.getToken(lexer).getNumber());
// If we continue we'll simply see the EOF
EXPECT_EQ(s_null, State::start(lexer, options));
EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
}
// Test tokens that look like (or start out as) numbers,
// but turn out to be strings. Tests include escaped characters.
TEST_F(MasterLexerStateTest, stringNumbers) {
ss << "-1 "; // Negative numbers are interpreted
// as strings (unsigned integers only)
ss << "123abc456 "; // 'Numbers' containing non-digits should
// be interpreted as strings
ss << "123\\456 "; // Numbers containing escaped digits are
// interpreted as strings
ss << "3scaped\\ space ";
ss << "3scaped\\\ttab ";
ss << "3scaped\\(paren ";
ss << "3scaped\\)close ";
ss << "3scaped\\;comment ";
ss << "3scaped\\\\ 8ackslash "; // second '\' shouldn't escape ' '
lexer.pushSource(ss);
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer));
stringTokenCheck("-1", s_string.getToken(lexer), false);
// Starts out as a number, but ends up being a string
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
stringTokenCheck("123abc456", s_number.getToken(lexer), false);
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer));
stringTokenCheck("123\\456", s_number.getToken(lexer), false);
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' at end
stringTokenCheck("3scaped\\ space", s_number.getToken(lexer));
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' at end
stringTokenCheck("3scaped\\\ttab", s_number.getToken(lexer));
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' at end
stringTokenCheck("3scaped\\(paren", s_number.getToken(lexer));
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' at end
stringTokenCheck("3scaped\\)close", s_number.getToken(lexer));
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' at end
stringTokenCheck("3scaped\\;comment", s_number.getToken(lexer));
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' in mid
stringTokenCheck("3scaped\\\\", s_number.getToken(lexer));
// Confirm the word that follows the escaped '\' is correctly recognized.
EXPECT_EQ(&s_number, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_number.handle(lexer)); // recognize str, see ' ' at end
stringTokenCheck("8ackslash", s_number.getToken(lexer));
// If we continue we'll simply see the EOF
EXPECT_EQ(s_null, State::start(lexer, options));
EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
}
} // end anonymous namespace
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment