Commit 31b507f4 authored by JINMEI Tatuya's avatar JINMEI Tatuya

[2373] supported quoted string state

parent 8aa33163
......@@ -193,7 +193,6 @@ public:
}
};
// Currently this is provided mostly as a place holder
class String : public State {
public:
String() {}
......@@ -201,6 +200,13 @@ public:
virtual const State* handle(MasterLexer& lexer) const;
};
class QString : public State {
public:
QString() {}
virtual ~QString() {} // see the base class for the destructor
virtual const State* handle(MasterLexer& lexer) const;
};
// We use a common instance of a each state in a singleton-like way to save
// construction overhead. They are not singletons in its strict sense as
// we don't prohibit direct construction of these objects. But that doesn't
......@@ -208,6 +214,7 @@ public:
// this file.
const CRLF CRLF_STATE;
const String STRING_STATE;
const QString QSTRING_STATE;
}
const State&
......@@ -217,6 +224,8 @@ State::getInstance(ID state_id) {
return (CRLF_STATE);
case String:
return (STRING_STATE);
case QString:
return (QSTRING_STATE);
}
// This is a bug of the caller, and this method is only expected to be
......@@ -261,6 +270,9 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
if (paren_count == 0) { // check if we are in () (see above)
return (&CRLF_STATE);
}
} else if (c == '"' && (options & MasterLexer::QSTRING) != 0) {
lexerimpl.last_was_eol_ = false;
return (&QSTRING_STATE);
} else if (c == '(') {
lexerimpl.last_was_eol_ = false;
++paren_count;
......@@ -284,7 +296,6 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
const State*
String::handle(MasterLexer& lexer) const {
std::vector<char>& data = getLexerImpl(lexer)->data_;
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
data.clear();
bool escaped = false;
......@@ -298,14 +309,48 @@ String::handle(MasterLexer& lexer) const {
(!escaped &&
(c == ' ' || c == '\t' || c == '(' || c == ')'))) {
getLexerImpl(lexer)->source_->ungetChar();
token = MasterLexer::Token(&data.at(0), data.size());
getLexerImpl(lexer)->token_ =
MasterLexer::Token(&data.at(0), data.size());
return (NULL);
}
escaped = (!escaped && (c == '\\'));
escaped = (c == '\\' && !escaped);
data.push_back(c);
}
}
const State*
QString::handle(MasterLexer& lexer) const {
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
std::vector<char>& data = getLexerImpl(lexer)->data_;
data.clear();
bool escaped = false;
while (true) {
const int c = getLexerImpl(lexer)->source_->getChar();
if (c == InputSource::END_OF_STREAM) {
token = Token(Token::UNEXPECTED_END);
return (NULL);
} else if (c == '"') {
if (escaped) {
// found escaped '"'. overwrite the preceding backslash.
assert(!data.empty());
escaped = false;
data.back() = '"';
} else {
token = MasterLexer::Token(&data.at(0), data.size(), true);
return (NULL);
}
} else if (c == '\n' && !escaped) {
getLexerImpl(lexer)->source_->ungetChar();
token = Token(Token::UNBALANCED_QUOTES);
return (NULL);
} else {
escaped = (c == '\\' && !escaped);
data.push_back(c);
}
}
}
} // namespace master_lexer_internal
} // end of namespace dns
......
......@@ -98,7 +98,8 @@ public:
/// a way to get an instance of a specific state.
enum ID {
CRLF, ///< Just seen a carriage-return character
String ///< Handling a string token
String, ///< Handling a string token
QString ///< Handling a quoted string token
};
/// \brief Returns a \c State instance of the given state.
......
......@@ -32,6 +32,7 @@ protected:
s_null(NULL),
s_crlf(State::getInstance(State::CRLF)),
s_string(State::getInstance(State::String)),
s_qstring(State::getInstance(State::QString)),
options(MasterLexer::NONE),
orig_options(options)
{}
......@@ -42,6 +43,7 @@ protected:
const State* const s_null;
const State& s_crlf;
const State& s_string;
const State& s_qstring;
std::stringstream ss;
MasterLexer::Options options, orig_options;
};
......@@ -254,9 +256,10 @@ TEST_F(MasterLexerStateTest, crlf) {
}
void
stringTokenCheck(const std::string& expected, const MasterLexer::Token& token)
stringTokenCheck(const std::string& expected, const MasterLexer::Token& token,
bool quoted = false)
{
EXPECT_EQ(Token::STRING, token.getType());
EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType());
EXPECT_EQ(expected, token.getString());
const std::string actual(token.getStringRegion().beg,
token.getStringRegion().beg +
......@@ -350,4 +353,84 @@ TEST_F(MasterLexerStateTest, stringEscape) {
stringTokenCheck("escaped\\\\", s_string.getToken(lexer));
}
TEST_F(MasterLexerStateTest, quotedString) {
ss << "\"ignore-quotes\"\n";
ss << "\"quoted string\" ";
ss << "\"escape\\ in quote\" ";
ss << "\"escaped\\\"\" ";
ss << "\"escaped backslash\\\\\" ";
ss << "\"no;comment\"";
lexer.pushSource(ss);
// by default, '"' doesn't have any special meaning and part of string
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer));
EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
EXPECT_TRUE(s_string.wasLastEOL(lexer));
// If QSTRING is specified in option, '"' is regarded as a beginning of
// a quoted string.
const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"'
EXPECT_EQ(s_null, s_qstring.handle(lexer));
stringTokenCheck("quoted string", s_string.getToken(lexer), true);
// escape character mostly doesn't have any effect in the qstring
// processing
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true);
// The only exception is the quotation mark itself. Note that the escape
// only works on the quotation mark immediately after it.
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
stringTokenCheck("escaped\"", s_string.getToken(lexer), true);
// quoted '\' then '"'. Unlike the previous case '"' shouldn't be
// escaped.
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true);
// ';' has no meaning in a quoted string (not indicating a comment)
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
stringTokenCheck("no;comment", s_string.getToken(lexer), true);
}
TEST_F(MasterLexerStateTest, brokenQuotedString) {
ss << "\"unbalanced-quote\n";
ss << "\"quoted\\\n\" ";
ss << "\"unclosed quote and EOF";
lexer.pushSource(ss);
// EOL is encountered without closing the quote
const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
EXPECT_EQ(Token::UNBALANCED_QUOTES,
s_qstring.getToken(lexer).getErrorCode());
// We can resume after the error from the '\n'
EXPECT_EQ(s_null, State::start(lexer, options));
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
// \n is okay in a quoted string if escaped
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true);
// EOF is encountered without closing the quote
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode());
// If we continue we'll simply see the EOF
EXPECT_EQ(s_null, State::start(lexer, options));
EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment