Commit f73f2747 authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[2382] unrelated fix to lexer: support empty qstring and nul termination.

an empty qstring previously caused an exception, which is a clear bug
and should be fixed.  nul-terminating string regions is an extension,
but I found it useful when implementing RDATA parsers.
parent ca8fc9f4
......@@ -458,8 +458,11 @@ String::handle(MasterLexer& lexer) const {
if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
getLexerImpl(lexer)->source_->ungetChar();
// make sure it nul-terminated as a c-str (excluded from token
// data).
data.push_back('\0');
getLexerImpl(lexer)->token_ =
MasterToken(&data.at(0), data.size());
MasterToken(&data.at(0), data.size() - 1);
return;
}
escaped = (c == '\\' && !escaped);
......@@ -486,7 +489,10 @@ QString::handle(MasterLexer& lexer) const {
escaped = false;
data.back() = '"';
} else {
token = MasterToken(&data.at(0), data.size(), true);
// make sure it nul-terminated as a c-str (excluded from token
// data). This also simplifies the case of an empty string.
data.push_back('\0');
token = MasterToken(&data.at(0), data.size() - 1, true);
return;
}
} else if (c == '\n' && !escaped) {
......@@ -529,7 +535,8 @@ Number::handle(MasterLexer& lexer) const {
token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE);
}
} else {
token = MasterToken(&data.at(0), data.size());
data.push_back('\0'); // see String::handle()
token = MasterToken(&data.at(0), data.size() - 1);
}
return;
}
......
......@@ -90,6 +90,13 @@ public:
/// the region. On the other hand, it is not ensured that the string
/// is nul-terminated. So the usual string manipulation API may not work
/// as expected.
///
/// The `MasterLexer` implementation ensures that there are at least
/// len + 1 bytes of valid memory region starting from beg, and that
/// beg[len] is \0. This means the application can use the bytes as a
/// validly nul-terminated C string if there is no intermediate nul
/// character. Note also that due to this property beg is always non
/// NULL; for an empty string len will be set to 0 and beg[0] is \0.
struct StringRegion {
const char* beg; ///< The start address of the string
size_t len; ///< The length of the string in bytes
......
......@@ -269,6 +269,10 @@ stringTokenCheck(const std::string& expected, const MasterToken& token,
token.getStringRegion().beg +
token.getStringRegion().len);
EXPECT_EQ(expected, actual);
// There should be "hidden" nul-terminator after the string data.
ASSERT_NE(static_cast<const char*>(NULL), token.getStringRegion().beg);
EXPECT_EQ(0, *(token.getStringRegion().beg + token.getStringRegion().len));
}
TEST_F(MasterLexerStateTest, string) {
......@@ -365,6 +369,7 @@ TEST_F(MasterLexerStateTest, stringEscape) {
TEST_F(MasterLexerStateTest, quotedString) {
ss << "\"ignore-quotes\"\n";
ss << "\"quoted string\" "; // space is part of the qstring
ss << "\"\" "; // empty quoted string
// also check other separator characters. note that \r doesn't cause
// UNBALANCED_QUOTES. Not sure if it's intentional, but that's how the
// BIND 9 version works, so we follow it (it should be too minor to matter
......@@ -391,6 +396,11 @@ TEST_F(MasterLexerStateTest, quotedString) {
s_qstring.handle(lexer);
stringTokenCheck("quoted string", s_string.getToken(lexer), true);
// Empty string is okay as qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options));
s_qstring.handle(lexer);
stringTokenCheck("", s_string.getToken(lexer), true);
// Also checks other separator characters within a qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options));
s_qstring.handle(lexer);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment