Commit f73f2747 authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[2382] unrelated fix to lexer: support empty qstring and nul termination.

an empty qstring previously caused an exception, which is a clear bug
and should be fixed.  nul-terminating string regions is an extension,
but I found it useful when implementing RDATA parsers.
parent ca8fc9f4
...@@ -458,8 +458,11 @@ String::handle(MasterLexer& lexer) const { ...@@ -458,8 +458,11 @@ String::handle(MasterLexer& lexer) const {
if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) { if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
getLexerImpl(lexer)->source_->ungetChar(); getLexerImpl(lexer)->source_->ungetChar();
// make sure it nul-terminated as a c-str (excluded from token
// data).
data.push_back('\0');
getLexerImpl(lexer)->token_ = getLexerImpl(lexer)->token_ =
MasterToken(&data.at(0), data.size()); MasterToken(&data.at(0), data.size() - 1);
return; return;
} }
escaped = (c == '\\' && !escaped); escaped = (c == '\\' && !escaped);
...@@ -486,7 +489,10 @@ QString::handle(MasterLexer& lexer) const { ...@@ -486,7 +489,10 @@ QString::handle(MasterLexer& lexer) const {
escaped = false; escaped = false;
data.back() = '"'; data.back() = '"';
} else { } else {
token = MasterToken(&data.at(0), data.size(), true); // make sure it nul-terminated as a c-str (excluded from token
// data). This also simplifies the case of an empty string.
data.push_back('\0');
token = MasterToken(&data.at(0), data.size() - 1, true);
return; return;
} }
} else if (c == '\n' && !escaped) { } else if (c == '\n' && !escaped) {
...@@ -529,7 +535,8 @@ Number::handle(MasterLexer& lexer) const { ...@@ -529,7 +535,8 @@ Number::handle(MasterLexer& lexer) const {
token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE); token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE);
} }
} else { } else {
token = MasterToken(&data.at(0), data.size()); data.push_back('\0'); // see String::handle()
token = MasterToken(&data.at(0), data.size() - 1);
} }
return; return;
} }
......
...@@ -90,6 +90,13 @@ public: ...@@ -90,6 +90,13 @@ public:
/// the region. On the other hand, it is not ensured that the string /// the region. On the other hand, it is not ensured that the string
/// is nul-terminated. So the usual string manipulation API may not work /// is nul-terminated. So the usual string manipulation API may not work
/// as expected. /// as expected.
///
/// The `MasterLexer` implementation ensures that there are at least
/// len + 1 bytes of valid memory region starting from beg, and that
/// beg[len] is \0. This means the application can use the bytes as a
/// validly nul-terminated C string if there is no intermediate nul
/// character. Note also that due to this property beg is always non
/// NULL; for an empty string len will be set to 0 and beg[0] is \0.
struct StringRegion { struct StringRegion {
const char* beg; ///< The start address of the string const char* beg; ///< The start address of the string
size_t len; ///< The length of the string in bytes size_t len; ///< The length of the string in bytes
......
...@@ -269,6 +269,10 @@ stringTokenCheck(const std::string& expected, const MasterToken& token, ...@@ -269,6 +269,10 @@ stringTokenCheck(const std::string& expected, const MasterToken& token,
token.getStringRegion().beg + token.getStringRegion().beg +
token.getStringRegion().len); token.getStringRegion().len);
EXPECT_EQ(expected, actual); EXPECT_EQ(expected, actual);
// There should be "hidden" nul-terminator after the string data.
ASSERT_NE(static_cast<const char*>(NULL), token.getStringRegion().beg);
EXPECT_EQ(0, *(token.getStringRegion().beg + token.getStringRegion().len));
} }
TEST_F(MasterLexerStateTest, string) { TEST_F(MasterLexerStateTest, string) {
...@@ -365,6 +369,7 @@ TEST_F(MasterLexerStateTest, stringEscape) { ...@@ -365,6 +369,7 @@ TEST_F(MasterLexerStateTest, stringEscape) {
TEST_F(MasterLexerStateTest, quotedString) { TEST_F(MasterLexerStateTest, quotedString) {
ss << "\"ignore-quotes\"\n"; ss << "\"ignore-quotes\"\n";
ss << "\"quoted string\" "; // space is part of the qstring ss << "\"quoted string\" "; // space is part of the qstring
ss << "\"\" "; // empty quoted string
// also check other separator characters. note that \r doesn't cause // also check other separator characters. note that \r doesn't cause
// UNBALANCED_QUOTES. Not sure if it's intentional, but that's how the // UNBALANCED_QUOTES. Not sure if it's intentional, but that's how the
// BIND 9 version works, so we follow it (it should be too minor to matter // BIND 9 version works, so we follow it (it should be too minor to matter
...@@ -391,6 +396,11 @@ TEST_F(MasterLexerStateTest, quotedString) { ...@@ -391,6 +396,11 @@ TEST_F(MasterLexerStateTest, quotedString) {
s_qstring.handle(lexer); s_qstring.handle(lexer);
stringTokenCheck("quoted string", s_string.getToken(lexer), true); stringTokenCheck("quoted string", s_string.getToken(lexer), true);
// Empty string is okay as qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options));
s_qstring.handle(lexer);
stringTokenCheck("", s_string.getToken(lexer), true);
// Also checks other separator characters within a qstring // Also checks other separator characters within a qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options)); EXPECT_EQ(&s_qstring, State::start(lexer, options));
s_qstring.handle(lexer); s_qstring.handle(lexer);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment