Commit 644d7aae authored by Michal 'vorner' Vaner's avatar Michal 'vorner' Vaner
Browse files

[2375] Simplify the state transition design pattern

We never have a chain of the transitions. We only do start and zero or
one handle on the returned one. Simplify handle to return void, not
NULL.

Besides simplification, it allows testing without fake states (TBD) - we
had no real state to test the multiple transitions.
parent 5069933d
......@@ -181,9 +181,9 @@ MasterLexer::getNextToken(Options options) {
// And get the token
// This actually handles EOF internally too.
for (const State *state = start(options); state != NULL;
state = state->handle(*this)) {
// Do nothing here. All is handled in the for cycle header itself.
const State* state = start(options);
if (state != NULL) {
state->handle(*this);
}
// Make sure a token was produced. Since this Can Not Happen, we assert
// here instead of throwing.
......@@ -259,7 +259,7 @@ class CRLF : public State {
public:
CRLF() {}
virtual ~CRLF() {} // see the base class for the destructor
virtual const State* handle(MasterLexer& lexer) const {
virtual void handle(MasterLexer& lexer) const {
// We've just seen '\r'. If this is part of a sequence of '\r\n',
// we combine them as a single END-OF-LINE. Otherwise we treat the
// single '\r' as an EOL and continue tokeniziation from the character
......@@ -276,7 +276,6 @@ public:
}
getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
getLexerImpl(lexer)->last_was_eol_ = true;
return (NULL);
}
};
......@@ -284,14 +283,14 @@ class String : public State {
public:
String() {}
virtual ~String() {} // see the base class for the destructor
virtual const State* handle(MasterLexer& lexer) const;
virtual void handle(MasterLexer& lexer) const;
};
class QString : public State {
public:
QString() {}
virtual ~QString() {} // see the base class for the destructor
virtual const State* handle(MasterLexer& lexer) const;
virtual void handle(MasterLexer& lexer) const;
};
// We use a common instance of a each state in a singleton-like way to save
......@@ -383,7 +382,7 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
}
}
const State*
void
String::handle(MasterLexer& lexer) const {
std::vector<char>& data = getLexerImpl(lexer)->data_;
data.clear();
......@@ -397,14 +396,14 @@ String::handle(MasterLexer& lexer) const {
getLexerImpl(lexer)->source_->ungetChar();
getLexerImpl(lexer)->token_ =
MasterLexer::Token(&data.at(0), data.size());
return (NULL);
return;
}
escaped = (c == '\\' && !escaped);
data.push_back(c);
}
}
const State*
void
QString::handle(MasterLexer& lexer) const {
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
std::vector<char>& data = getLexerImpl(lexer)->data_;
......@@ -415,7 +414,7 @@ QString::handle(MasterLexer& lexer) const {
const int c = getLexerImpl(lexer)->source_->getChar();
if (c == InputSource::END_OF_STREAM) {
token = Token(Token::UNEXPECTED_END);
return (NULL);
return;
} else if (c == '"') {
if (escaped) {
// found escaped '"'. overwrite the preceding backslash.
......@@ -424,12 +423,12 @@ QString::handle(MasterLexer& lexer) const {
data.back() = '"';
} else {
token = MasterLexer::Token(&data.at(0), data.size(), true);
return (NULL);
return;
}
} else if (c == '\n' && !escaped) {
getLexerImpl(lexer)->source_->ungetChar();
token = Token(Token::UNBALANCED_QUOTES);
return (NULL);
return;
} else {
escaped = (c == '\\' && !escaped);
data.push_back(c);
......@@ -455,7 +454,7 @@ public:
set_eol_(set_eol),
callback_(callback)
{}
virtual const State* handle(MasterLexer& lexer) const {
virtual void handle(MasterLexer& lexer) const {
std::string input;
for (size_t i = 0; i < eat_chars_; ++i) {
input += getLexerImpl(lexer)->source_->getChar();
......@@ -470,7 +469,6 @@ public:
if (set_eol_ != NULL) {
getLexerImpl(lexer)->last_was_eol_ = *set_eol_;
}
return (next_);
}
private:
const State* const next_;
......
......@@ -82,16 +82,16 @@ public:
/// \brief Handle the process of one specific state.
///
/// This method is expected to be called on the object returned by
/// start(), and keep called on the returned object until NULL is
/// returned. The call chain will form the complete state transition.
/// start(). In the usual state transition design pattern, it would
/// return the next state. But as we noticed, we never have another
/// state, so we simplify it by not returning anything instead of
/// returning NULL every time.
///
/// \throw MasterLexer::ReadError Unexpected I/O error
/// \throw std::bad_alloc Internal resource allocation failure
///
/// \param lexer The lexer object that holds the main context.
/// \return A pointer to the next state object or NULL if the transition
/// is completed.
virtual const State* handle(MasterLexer& lexer) const = 0;
virtual void handle(MasterLexer& lexer) const = 0;
/// \brief Types of states.
///
......
......@@ -227,7 +227,7 @@ TEST_F(MasterLexerStateTest, crlf) {
// 1. A sequence of \r, \n is recognized as a single 'end-of-line'
EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
EXPECT_EQ(s_null, s_crlf.handle(lexer)); // recognize '\n'
s_crlf.handle(lexer); // recognize '\n'
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
......@@ -235,22 +235,22 @@ TEST_F(MasterLexerStateTest, crlf) {
// 'end-of-line'. then there will be "initial WS"
EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
// see ' ', "unget" it
EXPECT_EQ(s_null, s_crlf.handle(lexer));
s_crlf.handle(lexer);
EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
// 3. comment between \r and \n
EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
// skip comments, recognize '\n'
EXPECT_EQ(s_null, s_crlf.handle(lexer));
s_crlf.handle(lexer);
EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // skip 'a'
s_string.handle(lexer); // skip 'a'
// 4. \r then EOF
EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
// see EOF, then "unget" it
EXPECT_EQ(s_null, s_crlf.handle(lexer));
s_crlf.handle(lexer);
EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize EOF
EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
}
......@@ -281,41 +281,41 @@ TEST_F(MasterLexerStateTest, string) {
lexer.pushSource(ss);
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
s_string.handle(lexer); // recognize str, see \n
EXPECT_FALSE(s_string.wasLastEOL(lexer));
stringTokenCheck("followed-by-EOL", s_string.getToken(lexer));
EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \r
s_string.handle(lexer); // recognize str, see \r
stringTokenCheck("followed-by-CR", s_string.getToken(lexer));
EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // handle \r...
EXPECT_EQ(s_null, s_crlf.handle(lexer)); // ...and skip it
s_crlf.handle(lexer); // ...and skip it
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' '
s_string.handle(lexer); // recognize str, see ' '
stringTokenCheck("followed-by-space", s_string.getToken(lexer));
// skip ' ', then recognize the next string
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \t
s_string.handle(lexer); // recognize str, see \t
stringTokenCheck("followed-by-tab", s_string.getToken(lexer));
// skip \t, then recognize the next string
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see comment
s_string.handle(lexer); // recognize str, see comment
stringTokenCheck("followed-by-comment", s_string.getToken(lexer));
EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see '('
s_string.handle(lexer); // recognize str, see '('
stringTokenCheck("followed-by-paren", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options)); // str in ()
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize the str, see ')'
s_string.handle(lexer); // recognize the str, see ')'
stringTokenCheck("closing", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see EOF
s_string.handle(lexer); // recognize str, see EOF
stringTokenCheck("followed-by-EOF", s_string.getToken(lexer));
}
......@@ -331,32 +331,32 @@ TEST_F(MasterLexerStateTest, stringEscape) {
lexer.pushSource(ss);
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
s_string.handle(lexer); // recognize str, see ' ' at end
stringTokenCheck("escaped\\ space", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
s_string.handle(lexer); // recognize str, see ' ' at end
stringTokenCheck("escaped\\\ttab", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
s_string.handle(lexer); // recognize str, see ' ' at end
stringTokenCheck("escaped\\(paren", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
s_string.handle(lexer); // recognize str, see ' ' at end
stringTokenCheck("escaped\\)close", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
s_string.handle(lexer); // recognize str, see ' ' at end
stringTokenCheck("escaped\\;comment", s_string.getToken(lexer));
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' in mid
s_string.handle(lexer); // recognize str, see ' ' in mid
stringTokenCheck("escaped\\\\", s_string.getToken(lexer));
// Confirm the word that follows the escaped '\' is correctly recognized.
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
s_string.handle(lexer); // recognize str, see ' ' at end
stringTokenCheck("backslash", s_string.getToken(lexer));
}
......@@ -376,7 +376,7 @@ TEST_F(MasterLexerStateTest, quotedString) {
// by default, '"' doesn't have any special meaning and part of string
EXPECT_EQ(&s_string, State::start(lexer, common_options));
EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
s_string.handle(lexer); // recognize str, see \n
stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer));
EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
EXPECT_TRUE(s_string.wasLastEOL(lexer));
......@@ -386,35 +386,35 @@ TEST_F(MasterLexerStateTest, quotedString) {
const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"'
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("quoted string", s_string.getToken(lexer), true);
// Also checks other separator characters within a qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("quoted()\t\rstring", s_string.getToken(lexer), true);
// escape character mostly doesn't have any effect in the qstring
// processing
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true);
// The only exception is the quotation mark itself. Note that the escape
// only works on the quotation mark immediately after it.
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("escaped\"", s_string.getToken(lexer), true);
// quoted '\' then '"'. Unlike the previous case '"' shouldn't be
// escaped.
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true);
// ';' has no meaning in a quoted string (not indicating a comment)
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("no;comment", s_string.getToken(lexer), true);
}
......@@ -427,7 +427,7 @@ TEST_F(MasterLexerStateTest, brokenQuotedString) {
// EOL is encountered without closing the quote
const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
EXPECT_EQ(Token::UNBALANCED_QUOTES,
s_qstring.getToken(lexer).getErrorCode());
......@@ -437,12 +437,12 @@ TEST_F(MasterLexerStateTest, brokenQuotedString) {
// \n is okay in a quoted string if escaped
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true);
// EOF is encountered without closing the quote
EXPECT_EQ(&s_qstring, State::start(lexer, options));
EXPECT_EQ(s_null, s_qstring.handle(lexer));
s_qstring.handle(lexer);
ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode());
// If we continue we'll simply see the EOF
......
......@@ -215,33 +215,6 @@ TEST_F(MasterLexerTest, simpleGetToken) {
EXPECT_EQ(45, rest);
}
// A token that takes multiple states.
//
// The first state sets the token as well as the second. The second one should
// survive and be returned.
TEST_F(MasterLexerTest, chainGetToken) {
// Build the states
const MasterLexer::Token t1(MasterLexer::Token::END_OF_LINE);
const MasterLexer::Token t2(MasterLexer::Token::INITIAL_WS);
scoped_ptr<const State> s2(State::getFakeState(NULL, 1, &t2));
scoped_ptr<const State> s1(State::getFakeState(s2.get(), 2, &t1));
lexer.pushFakeStart(s1.get());
// Put something into the source
ss << "12345";
lexer.pushSource(ss);
// Get the token.
const MasterLexer::Token generated(lexer.getNextToken());
// It is the same token as the second one (well, on a different address)
// We can't compare directly, so compare types.
EXPECT_EQ(t2.getType(), generated.getType());
// 3 characters were read from the source.
// We test by extracting the rest and comparing.
int rest;
ss >> rest;
EXPECT_EQ(45, rest);
}
// Test getting a token without overriding the start() method (well, it
// is overriden, but no fake state is set, so it refers to the real one).
//
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment