Commit 761ece75 authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[master] Merge branch 'trac2572'

parents e906f116 bd51648e
......@@ -18,17 +18,28 @@
#include <dns/master_lexer_inputsource.h>
#include <dns/master_lexer_state.h>
#include <boost/foreach.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/lexical_cast.hpp>
#include <bitset>
#include <cassert>
#include <limits>
#include <string>
#include <vector>
namespace isc {
namespace dns {
// The definition of SOURCE_SIZE_UNKNOWN. Note that we initialize it using
// a method of another library. Technically, this could trigger a static
// initialization fiasco. But in this particular usage it's very unlikely
// to happen because this value is expected to be used only as a return
// value of a MasterLexer's method, and its constructor needs definitions
// here.
const size_t MasterLexer::SOURCE_SIZE_UNKNOWN =
std::numeric_limits<size_t>::max();
namespace {
typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
} // end unnamed namespace
......@@ -172,6 +183,30 @@ MasterLexer::getSourceLine() const {
return (impl_->sources_.back()->getCurrentLine());
}
size_t
MasterLexer::getTotalSourceSize() const {
size_t total_size = 0;
BOOST_FOREACH(InputSourcePtr& src, impl_->sources_) {
// If the size of any pushed source is unknown, the total is also
// considered unknown.
if (src->getSize() == SOURCE_SIZE_UNKNOWN) {
return (SOURCE_SIZE_UNKNOWN);
}
total_size += src->getSize();
}
return (total_size);
}
size_t
MasterLexer::getPosition() const {
size_t position = 0;
BOOST_FOREACH(InputSourcePtr& src, impl_->sources_) {
position += src->getPosition();
}
return (position);
}
const MasterToken&
MasterLexer::getNextToken(Options options) {
if (impl_->source_ == NULL) {
......
......@@ -331,6 +331,16 @@ public:
const MasterToken token_;
};
/// \brief Special value for input source size meaning "unknown".
///
/// This constant value will be used as a return value of
/// \c getTotalSourceSize() when the size of one of the pushed sources
/// is unknown. Note that this value itself is a valid integer in the
/// range of the type, so there's still a small possibility of
/// ambiguity. In practice, however, the value should be sufficiently
/// large that should eliminate the possibility.
static const size_t SOURCE_SIZE_UNKNOWN;
/// \brief Options for getNextToken.
///
/// A compound option, indicating multiple options are set, can be
......@@ -387,6 +397,10 @@ public:
/// The caller can explicitly tell \c MasterLexer to stop using the
/// stream by calling the \c popSource() method.
///
/// The data in \c input must be complete at the time of this call.
/// The behavior of the lexer is undefined if the caller builds or adds
/// data in \c input after pushing it.
///
/// \param input An input stream object that produces textual
/// representation of DNS RRs.
void pushSource(std::istream& input);
......@@ -443,6 +457,57 @@ public:
/// \return The current line number of the source (see the description)
size_t getSourceLine() const;
/// \brief Return the total size of pushed sources.
///
/// This method returns the sum of the size of sources that have been
/// pushed to the lexer by the time of the call. It would give the
/// caller of some hint about the amount of data the lexer is working on.
///
/// The size of a normal file is equal to the file size at the time of
/// the source is pushed. The size of other type of input stream is
/// the size of the data available in the stream at the time of the
/// source is pushed.
///
/// In some special cases, it's possible that the size of the file or
/// stream is unknown. It happens, for example, if the standard input
/// is associated with a pipe from the output of another process and it's
/// specified as an input source. If the size of some of the pushed
/// pushed source is unknown, this method returns SOURCE_SIZE_UNKNOWN.
///
/// If there is no source pushed in the lexer, it returns 0.
///
/// \throw None
size_t getTotalSourceSize() const;
/// \brief Return the position of lexer in the currently pushed sources.
///
/// This method returns the position in terms of the number of recognized
/// characters from all sources. Roughly speaking, the position in a
/// single source is the offset from the beginning of the file or stream
/// to the current "read cursor" of the lexer, and the return value of
/// this method is the sum of the position in all the pushed sources.
///
/// If the lexer reaches the end for each of all the pushed sources,
/// the return value should be equal to that of \c getTotalSourceSize().
///
/// If there is no source pushed in the lexer, it returns 0.
///
/// The return values of this method and \c getTotalSourceSize() would
/// give the caller an idea of the progress of the lexer at the time of
/// the call. Note, however, that since it's not predictable whether
/// more sources will be pushed after the call, the progress determined
/// this way may not make much sense; it can only give an informational
/// hint of the progress.
///
/// Note also that if a source is popped, this method will normally return
/// a smaller number by definition (and so will \c getTotalSourceSize()).
/// Likewise, the conceptual "read cursor" would move backward after a
/// call to \c ungetToken(), in which case this method will return a
/// smaller value, too.
///
/// \throw None
size_t getPosition() const;
/// \brief Parse and return another token from the input.
///
/// It reads a bit of the last opened source and produces another token
......
......@@ -15,6 +15,9 @@
#include <dns/master_lexer_inputsource.h>
#include <dns/master_lexer.h>
#include <istream>
#include <iostream>
#include <cassert>
#include <cerrno>
#include <cstring>
......@@ -31,6 +34,37 @@ createStreamName(const std::istream& input_stream) {
return (ss.str());
}
size_t
getStreamSize(std::istream& is) {
is.seekg(0, std::ios_base::end);
if (is.bad()) {
// This means the istream has an integrity error. It doesn't make
// sense to continue from this point, so we treat it as a fatal error.
isc_throw(InputSource::OpenError,
"failed to seek end of input source");
} else if (is.fail()) {
// This is an error specific to seekg(). There can be several
// reasons, but the most likely cause in this context is that the
// stream is associated with a special type of file such as a pipe.
// In this case, it's more likely that other main operations of
// the input source work fine, so we continue with just setting
// the stream size to "unknown".
is.clear(); // clear this error not to confuse later ops.
return (MasterLexer::SOURCE_SIZE_UNKNOWN);
}
const std::streampos len = is.tellg();
if (len == -1) {
isc_throw(InputSource::OpenError, "failed to get input size");
}
is.seekg(0, std::ios::beg);
if (is.fail()) {
isc_throw(InputSource::OpenError,
"failed to seek beginning of input source");
}
assert(len >= 0);
return (len);
}
} // end of unnamed namespace
// Explicit definition of class static constant. The value is given in the
......@@ -42,31 +76,44 @@ InputSource::InputSource(std::istream& input_stream) :
line_(1),
saved_line_(line_),
buffer_pos_(0),
total_pos_(0),
name_(createStreamName(input_stream)),
input_(input_stream)
input_(input_stream),
input_size_(getStreamSize(input_))
{}
InputSource::InputSource(const char* filename) :
at_eof_(false),
line_(1),
saved_line_(line_),
buffer_pos_(0),
name_(filename),
input_(file_stream_)
{
namespace {
// A helper to initialize InputSource::input_ in the member initialization
// list.
std::istream&
openFileStream(std::ifstream& file_stream, const char* filename) {
errno = 0;
file_stream_.open(filename);
if (file_stream_.fail()) {
file_stream.open(filename);
if (file_stream.fail()) {
std::string error_txt("Error opening the input source file: ");
error_txt += filename;
if (errno != 0) {
error_txt += "; possible cause: ";
error_txt += std::strerror(errno);
}
isc_throw(OpenError, error_txt);
isc_throw(InputSource::OpenError, error_txt);
}
return (file_stream);
}
}
InputSource::InputSource(const char* filename) :
at_eof_(false),
line_(1),
saved_line_(line_),
buffer_pos_(0),
total_pos_(0),
name_(filename),
input_(openFileStream(file_stream_, filename)),
input_size_(getStreamSize(input_))
{}
InputSource::~InputSource()
{
if (file_stream_.is_open()) {
......@@ -103,6 +150,7 @@ InputSource::getChar() {
const int c = buffer_[buffer_pos_];
++buffer_pos_;
++total_pos_;
if (c == '\n') {
++line_;
}
......@@ -119,6 +167,7 @@ InputSource::ungetChar() {
"Cannot skip before the start of buffer");
} else {
--buffer_pos_;
--total_pos_;
if (buffer_[buffer_pos_] == '\n') {
--line_;
}
......@@ -127,6 +176,8 @@ InputSource::ungetChar() {
void
InputSource::ungetAll() {
assert(total_pos_ >= buffer_pos_);
total_pos_ -= buffer_pos_;
buffer_pos_ = 0;
line_ = saved_line_;
at_eof_ = false;
......
......@@ -65,12 +65,16 @@ public:
/// \brief Constructor which takes an input stream. The stream is
/// read-from, but it is not closed.
///
/// \throws OpenError If the data size of the input stream cannot be
/// detected.
explicit InputSource(std::istream& input_stream);
/// \brief Constructor which takes a filename to read from. The
/// associated file stream is managed internally.
///
/// \throws OpenError when opening the input file fails.
/// \throws OpenError when opening the input file fails or the size of
/// the file cannot be detected.
explicit InputSource(const char* filename);
/// \brief Destructor
......@@ -83,6 +87,34 @@ public:
return (name_);
}
/// \brief Returns the size of the input source in bytes.
///
/// If the size is unknown, it returns \c MasterLexer::SOURCE_SIZE_UNKNOWN.
///
/// See \c MasterLexer::getTotalSourceSize() for the definition of
/// the size of sources and for when the size can be unknown.
///
/// \throw None
size_t getSize() const { return (input_size_); }
/// \brief Returns the current read position in the input source.
///
/// This method returns the position of the character that was last
/// retrieved from the source. Unless some characters have been
/// "ungotten" by \c ungetChar() or \c ungetAll(), this value is equal
/// to the number of calls to \c getChar() until it reaches the
/// END_OF_STREAM. Note that the position of the first character in
/// the source is 1. At the point of the last character, the return value
/// of this method should be equal to that of \c getSize(), and
/// recognizing END_OF_STREAM doesn't increase the position.
///
/// If \c ungetChar() or \c ungetAll() is called, the position is
/// decreased by the number of "ungotten" characters. So the return
/// values may not always monotonically increase.
///
/// \throw None
size_t getPosition() const { return (total_pos_); }
/// \brief Returns if the input source is at end of file.
bool atEOF() const {
return (at_eof_);
......@@ -142,10 +174,12 @@ private:
std::vector<char> buffer_;
size_t buffer_pos_;
size_t total_pos_;
const std::string name_;
std::ifstream file_stream_;
std::istream& input_;
const size_t input_size_;
};
} // namespace master_lexer_internal
......
......@@ -13,6 +13,7 @@
// PERFORMANCE OF THIS SOFTWARE.
#include <dns/master_lexer_inputsource.h>
#include <dns/master_lexer.h>
#include <exceptions/exceptions.h>
#include <gtest/gtest.h>
......@@ -29,10 +30,13 @@ using namespace isc::dns::master_lexer_internal;
namespace {
const char* const test_input =
"Line1 to scan.\nLine2 to scan.\nLine3 to scan.\n";
class InputSourceTest : public ::testing::Test {
protected:
InputSourceTest() :
str_("Line1 to scan.\nLine2 to scan.\nLine3 to scan.\n"),
str_(test_input),
str_length_(strlen(str_)),
iss_(str_),
source_(iss_)
......@@ -73,6 +77,7 @@ checkGetAndUngetChar(InputSource& source,
{
for (size_t i = 0; i < str_length; ++i) {
EXPECT_EQ(str[i], source.getChar());
EXPECT_EQ(i + 1, source.getPosition());
EXPECT_FALSE(source.atEOF());
}
......@@ -85,6 +90,10 @@ checkGetAndUngetChar(InputSource& source,
// Now, EOF should be set.
EXPECT_TRUE(source.atEOF());
// It doesn't increase the position count.
EXPECT_EQ(str_length, source.getPosition());
EXPECT_EQ(str_length, source.getSize()); // this should be == getSize().
// Now, let's go backwards. This should cause the EOF to be set to
// false.
source.ungetChar();
......@@ -92,6 +101,9 @@ checkGetAndUngetChar(InputSource& source,
// Now, EOF should be false.
EXPECT_FALSE(source.atEOF());
// But the position shouldn't change.
EXPECT_EQ(str_length, source.getPosition());
// This should cause EOF to be set again.
EXPECT_EQ(InputSource::END_OF_STREAM, source.getChar());
......@@ -106,6 +118,7 @@ checkGetAndUngetChar(InputSource& source,
// Skip one character.
source.ungetChar();
EXPECT_EQ(str[index], source.getChar());
EXPECT_EQ(index + 1, source.getPosition());
// Skip the character we received again.
source.ungetChar();
}
......@@ -144,6 +157,7 @@ TEST_F(InputSourceTest, ungetAll) {
// Now we are back to where we started.
EXPECT_EQ(1, source_.getCurrentLine());
EXPECT_FALSE(source_.atEOF());
EXPECT_EQ(0, source_.getPosition());
}
TEST_F(InputSourceTest, compact) {
......@@ -175,6 +189,9 @@ TEST_F(InputSourceTest, compact) {
EXPECT_TRUE(source_.atEOF());
EXPECT_EQ(4, source_.getCurrentLine());
// compact shouldn't change the position count.
EXPECT_EQ(source_.getSize(), source_.getPosition());
// Skip the EOF.
source_.ungetChar();
......@@ -322,4 +339,36 @@ TEST_F(InputSourceTest, saveLine) {
EXPECT_FALSE(source_.atEOF());
}
TEST_F(InputSourceTest, getSize) {
// A simple case using string stream
EXPECT_EQ(strlen(test_input), source_.getSize());
// Check it works with an empty input
istringstream iss("");
EXPECT_EQ(0, InputSource(iss).getSize());
// Pretend there's an error in seeking in the stream. It will be
// considered a seek specific error, and getSize() returns "unknown".
iss.setstate(std::ios_base::failbit);
EXPECT_EQ(MasterLexer::SOURCE_SIZE_UNKNOWN, InputSource(iss).getSize());
// The fail bit should have been cleared.
EXPECT_FALSE(iss.fail());
// Pretend there's a *critical* error in the stream. The constructor will
// throw in the attempt of getting the input size.
iss.setstate(std::ios_base::badbit);
EXPECT_THROW(InputSource isrc(iss), InputSource::OpenError);
// Check with input source from file name. We hardcode the file size
// for simplicity. It won't change too often.
EXPECT_EQ(143, InputSource(TEST_DATA_SRCDIR "/masterload.txt").getSize());
}
TEST_F(InputSourceTest, getPosition) {
// Initially the position is set to 0. Other cases are tested in tests
// for get and unget.
EXPECT_EQ(0, source_.getPosition());
EXPECT_EQ(0, InputSource(TEST_DATA_SRCDIR "/masterload.txt").getPosition());
}
} // end namespace
......@@ -52,6 +52,8 @@ void
checkEmptySource(const MasterLexer& lexer) {
EXPECT_TRUE(lexer.getSourceName().empty());
EXPECT_EQ(0, lexer.getSourceLine());
EXPECT_EQ(0, lexer.getTotalSourceSize());
EXPECT_EQ(0, lexer.getPosition());
}
TEST_F(MasterLexerTest, preOpen) {
......@@ -61,9 +63,11 @@ TEST_F(MasterLexerTest, preOpen) {
TEST_F(MasterLexerTest, pushStream) {
EXPECT_EQ(0, lexer.getSourceCount());
ss << "test";
lexer.pushSource(ss);
EXPECT_EQ(expected_stream_name, lexer.getSourceName());
EXPECT_EQ(1, lexer.getSourceCount());
EXPECT_EQ(4, lexer.getTotalSourceSize()); // 4 = len("test")
// From the point of view of this test, we only have to check (though
// indirectly) getSourceLine calls InputSource::getCurrentLine. It should
......@@ -85,6 +89,10 @@ TEST_F(MasterLexerTest, pushFile) {
EXPECT_EQ(TEST_DATA_SRCDIR "/masterload.txt", lexer.getSourceName());
EXPECT_EQ(1, lexer.getSourceLine());
// 143 = size of the test zone file. hardcode it assuming it won't change
// too often.
EXPECT_EQ(143, lexer.getTotalSourceSize());
lexer.popSource();
checkEmptySource(lexer);
EXPECT_EQ(0, lexer.getSourceCount());
......@@ -116,21 +124,42 @@ TEST_F(MasterLexerTest, pushFileFail) {
}
TEST_F(MasterLexerTest, nestedPush) {
ss << "test";
lexer.pushSource(ss);
EXPECT_EQ(expected_stream_name, lexer.getSourceName());
// We can push another source without popping the previous one.
lexer.pushSource(TEST_DATA_SRCDIR "/masterload.txt");
EXPECT_EQ(TEST_DATA_SRCDIR "/masterload.txt", lexer.getSourceName());
EXPECT_EQ(143 + 4, lexer.getTotalSourceSize()); // see above for magic nums
// popSource() works on the "topmost" (last-pushed) source
lexer.popSource();
EXPECT_EQ(expected_stream_name, lexer.getSourceName());
EXPECT_EQ(4, lexer.getTotalSourceSize());
lexer.popSource();
EXPECT_TRUE(lexer.getSourceName().empty());
}
TEST_F(MasterLexerTest, unknownSourceSize) {
// Similar to the previous case, but the size of the second source
// will be considered "unknown" (by emulating an error).
ss << "test";
lexer.pushSource(ss);
EXPECT_EQ(4, lexer.getTotalSourceSize());
stringstream ss2;
ss2.setstate(std::ios_base::failbit); // this will make the size unknown
lexer.pushSource(ss2);
// Then the total size is also unknown.
EXPECT_EQ(MasterLexer::SOURCE_SIZE_UNKNOWN, lexer.getTotalSourceSize());
// If we pop that source, the size becomes known again.
lexer.popSource();
EXPECT_EQ(4, lexer.getTotalSourceSize());
}
TEST_F(MasterLexerTest, invalidPop) {
// popSource() cannot be called if the sources stack is empty.
EXPECT_THROW(lexer.popSource(), isc::InvalidOperation);
......@@ -141,25 +170,31 @@ TEST_F(MasterLexerTest, noSource) {
EXPECT_THROW(lexer.getNextToken(), isc::InvalidOperation);
}
// Test getting some tokens
// Test getting some tokens. It also check basic behavior of getPosition().
TEST_F(MasterLexerTest, getNextToken) {
ss << "\n \n\"STRING\"\n";
lexer.pushSource(ss);
// First, the newline should get out.
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(1, lexer.getPosition());
// Then the whitespace, if we specify the option.
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
EXPECT_EQ(2, lexer.getPosition());
// The newline
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(5, lexer.getPosition()); // 1st \n + 3 spaces, then 2nd \n
// The (quoted) string
EXPECT_EQ(MasterToken::QSTRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
EXPECT_EQ(5 + 8, lexer.getPosition()); // 8 = len("STRING') + quotes
// And the end of line and file
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(5 + 8 + 1, lexer.getPosition()); // previous + 3rd \n
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(5 + 8 + 1, lexer.getPosition()); // position doesn't change
}
// Test we correctly find end of file.
......@@ -204,20 +239,25 @@ TEST_F(MasterLexerTest, getUnbalancedString) {
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
}
// Test ungetting tokens works
// Test ungetting tokens works. Also check getPosition() is adjusted
TEST_F(MasterLexerTest, ungetToken) {
ss << "\n (\"string\"\n) more";
lexer.pushSource(ss);
// Try getting the newline
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(1, lexer.getPosition());
// Return it and get again
lexer.ungetToken();
EXPECT_EQ(0, lexer.getPosition());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(1, lexer.getPosition());
// Get the string and return it back
EXPECT_EQ(MasterToken::QSTRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
EXPECT_EQ(string("\n (\"string\"").size(), lexer.getPosition());
lexer.ungetToken();
EXPECT_EQ(1, lexer.getPosition()); // back to just after 1st \n
// But if we change the options, it honors them
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::QSTRING |
......@@ -253,7 +293,8 @@ TEST_F(MasterLexerTest, ungetRealOptions) {
}
// Check the initial whitespace is found even in the first line of included
// file
// file. It also confirms getPosition() works for multiple sources, each
// of which is partially parsed.
TEST_F(MasterLexerTest, includeAndInitialWS) {
ss << " \n";
lexer.pushSource(ss);
......@@ -263,9 +304,11 @@ TEST_F(MasterLexerTest, includeAndInitialWS) {
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
EXPECT_EQ(1, lexer.getPosition());
lexer.pushSource(ss2);
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
EXPECT_EQ(2, lexer.getPosition()); // should be sum of position positions.
}
// Test only one token can be ungotten
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment