Commit eb264aec authored by Thomas Markwalder's avatar Thomas Markwalder Committed by Tomek Mrugalski
Browse files

[5680] Refactor stringSanitizer func into StringSanitizer class

modified:
    src/lib/dhcpsrv/d2_client_cfg.cc
    src/lib/dhcpsrv/d2_client_cfg.h
    src/lib/util/strutil.cc
    src/lib/util/strutil.h
    src/lib/util/tests/strutil_unittest.cc
parent ea3d6bca
......@@ -113,7 +113,8 @@ D2ClientConfig::D2ClientConfig(const bool enable_updates,
generated_prefix_(generated_prefix),
qualifying_suffix_(qualifying_suffix),
hostname_char_set_(hostname_char_set),
hostname_char_replacement_(hostname_char_replacement) {
hostname_char_replacement_(hostname_char_replacement),
hostname_sanitizer_(0) {
validateContents();
}
......@@ -133,7 +134,8 @@ D2ClientConfig::D2ClientConfig()
generated_prefix_(DFT_GENERATED_PREFIX),
qualifying_suffix_(""),
hostname_char_set_(DFT_HOSTNAME_CHAR_SET),
hostname_char_replacement_(DFT_HOSTNAME_CHAR_SET) {
hostname_char_replacement_(DFT_HOSTNAME_CHAR_SET),
hostname_sanitizer_(0) {
validateContents();
}
......@@ -172,6 +174,16 @@ D2ClientConfig::validateContents() {
<< server_ip_.toText() << "/" << server_port_);
}
if (!hostname_char_set_.empty()) {
try {
hostname_sanitizer_.reset(new isc::util::str::StringSanitizer(hostname_char_set_,
hostname_char_replacement_));
} catch (const std::exception& ex) {
isc_throw(D2ClientError, "D2ClientConfig: hostname-char-set"
" is not a valid regular expression");
}
}
/// @todo perhaps more validation we should do yet?
/// Are there any invalid combinations of options we need to test against?
}
......
......@@ -17,6 +17,7 @@
#include <cc/user_context.h>
#include <dhcp_ddns/ncr_io.h>
#include <exceptions/exceptions.h>
#include <util/strutil.h>
#include <boost/shared_ptr.hpp>
......@@ -312,6 +313,9 @@ private:
/// @brief A string to replace invalid characters when scrubbing hostnames.
/// Meaningful only if hostname_char_set_ is not empty.
std::string hostname_char_replacement_;
/// @brief Pointer to compiled regular expression string sanitizer
util::str::StringSanitizerPtr hostname_sanitizer_;
};
std::ostream&
......
......@@ -299,79 +299,111 @@ decodeFormattedHexString(const std::string& hex_string,
}
}
std::string
sanitizeString(const std::string& original,
const std::string& invalidChars,
const std::string& replacement) {
class StringSanitizerImpl {
public:
StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement)
: char_set_(char_set), char_replacement_(char_replacement) {
#ifdef USE_REGEX
std::regex rexpr;
try {
rexpr = std::regex(invalidChars, std::regex::extended);
} catch (const std::exception& ex) {
isc_throw(isc::BadValue, "invalid regex: '"
<< invalidChars << "', " << ex.what());
try {
scrub_exp_ = std::regex(char_set, std::regex::extended);
} catch (const std::exception& ex) {
isc_throw(isc::BadValue, "invalid regex: '"
<< char_set_ << "', " << ex.what());
}
#else
int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED);
if (ec) {
char errbuf[512] = "";
static_cast<void>(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf)));
regfree(&scrub_exp_);
isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf);
}
#endif
}
std::stringstream result;
try {
std::regex_replace(std::ostream_iterator<char>(result),
original.begin(), original.end(),
rexpr, replacement);
} catch (const std::exception& ex) {
isc_throw(isc::BadValue, "replacing '" << invalidChars << "' with '"
<< replacement << "' in '" << original << "' failed: ,"
<< ex.what());
/// @brief Destructor.
~StringSanitizerImpl() {
#ifndef USE_REGEX
regfree(&scrub_exp_);
#endif
}
return (result.str());
std::string scrub(const std::string& original) {
#ifdef USE_REGEX
std::stringstream result;
try {
std::regex_replace(std::ostream_iterator<char>(result),
original.begin(), original.end(),
scrub_expr, char_replacement_);
} catch (const std::exception& ex) {
isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '"
<< char_replacement_ << "' in '" << original << "' failed: ,"
<< ex.what());
}
return (result.str());
#else
// Compile the expression.
regex_t rex;
int ec = regcomp(&rex, invalidChars.c_str(), REG_EXTENDED);
if (ec) {
char errbuf[512] = "";
static_cast<void>(regerror(ec, &rex, errbuf, sizeof(errbuf)));
isc_throw(isc::BadValue, "invalid regex: '" << invalidChars
<< "', " << errbuf);
}
// Iterate over original string, match by match.
const char* origStr = original.c_str();
const char* startFrom = origStr;
const char* endAt = origStr + strlen(origStr);
regmatch_t matches[2]; // n matches + 1
stringstream result;
while (startFrom < endAt) {
// Look for the next match
if (regexec(&scrub_exp_, startFrom, 1, matches, 0) == REG_NOMATCH) {
// No matches, so add in the remainder
result << startFrom;
break;
}
// Iterate over original string, match by match.
const char* origStr = original.c_str();
const char* startFrom = origStr;
const char* endAt = origStr + strlen(origStr);
regmatch_t matches[2]; // n matches + 1
stringstream result;
while (startFrom < endAt) {
// Look for the next match
if (regexec(&rex, startFrom, 1, matches, 0) == REG_NOMATCH) {
// No matches, so add in the remainder
result << startFrom;
break;
}
// Shouldn't happen, but one never knows eh?
if (matches[0].rm_so == -1) {
isc_throw(isc::Unexpected, "matched but so is -1?");
}
// Shouldn't happen, but one never knows eh?
if (matches[0].rm_so == -1) {
isc_throw(isc::Unexpected, "matched but so is -1?");
}
// Add everything from starting point up to the current match
const char* matchAt = startFrom + matches[0].rm_so;
while (startFrom < matchAt) {
result << *startFrom;
++startFrom;
}
// Add in the replacement
result << char_replacement_;
// Add everything from starting point up to the current match
const char* matchAt = startFrom + matches[0].rm_so;
while (startFrom < matchAt) {
result << *startFrom;
// Move past the match.
++startFrom;
}
// Add in the replacement
result << replacement;
// Move past the match.
++startFrom;
return (result.str());
#endif
}
regfree(&rex);
return (result.str());
private:
std::string char_set_;
std::string char_replacement_;
#ifdef USE_REGEX
regex scrub_exp_;
#else
regex_t scrub_exp_;
#endif
};
StringSanitizer::StringSanitizer(const std::string& char_set,
const std::string& char_replacement)
: impl_(new StringSanitizerImpl(char_set, char_replacement)) {
}
StringSanitizer::~StringSanitizer() {
delete impl_;
}
std::string
StringSanitizer::scrub(const std::string& original) {
return (impl_->scrub(original));
}
} // namespace str
......
......@@ -15,6 +15,7 @@
#include <vector>
#include <exceptions/exceptions.h>
#include <boost/lexical_cast.hpp>
#include <boost/shared_ptr.hpp>
namespace isc {
namespace util {
......@@ -255,25 +256,53 @@ void
decodeFormattedHexString(const std::string& hex_string,
std::vector<uint8_t>& binary);
/// \brief Replaces all occurences of a character set in a string
///
/// This function runs a given string through a regular expression,
/// replacing all "matches" of that expression with the specified string.
///
/// \param original the string to sanitize
/// \param invalidChars string containing a regular expression (POSIX
/// extended syntax) that describes the characters to replace. If you
/// wanted to sanitize hostnames for example, you could specify the
/// inversion of valid characters "[^A-Za-z0-9_-]".
/// \param replacement string of one or more characters to use as the
/// replacement for invalid characters.
/// \return the new, sanitized string
/// \throw BadValue if given an invalid regular expression, Unexpected if
/// an error occurs executing the expression
std::string
sanitizeString(const std::string& original,
const std::string& invalidChars,
const std::string& replacement);
/// @brief Forward declaration to the @c StringSanitizer implementation.
class StringSanitizerImpl;
/// @brief Implements a regular expression based string scrubber
///
/// The implementation uses C++11 regex IF the environemnt supports it
/// (tested in configure.ac). If not it falls back to C lib regcomp/regexec.
/// Older compilers, such as pre Gnu 4.9.0, provided only experimental
/// implementations of regex which are recognized as buggy.
class StringSanitizer {
public:
/// Constructor
///
/// Compiles the given character set into a regular expression, and
/// retains the given character replacement. Thereafter, the instance
/// may be used to scrub an arbitrary number of strings.
///
/// @param char_set string containing a regular expression (POSIX
/// extended syntax) that describes the characters to replace. If you
/// wanted to sanitize hostnames for example, you could specify the
/// inversion of valid characters "[^A-Za-z0-9_-]".
/// @param char_replacement string of one or more characters to use as the
/// replacement for invalid characters.
/// @throw BadValue if given an invalid regular expression
StringSanitizer(const std::string& char_set,
const std::string& char_replacement);
/// @brief Destructor.
///
/// Destroys the implementation instance.
~StringSanitizer();
/// Returns a scrubbed copy of a given string
///
/// Replaces all occurrances of characters described by the regular
/// expression with the character replacement .
///
/// @param original the string to scrub
/// @throw Unexpected if an error occurs during scrubbing
std::string scrub(const std::string& original);
private:
/// @brief Pointer to the @c StringSanitizerImpl.
StringSanitizerImpl* impl_;
};
typedef boost::shared_ptr<StringSanitizer> StringSanitizerPtr;
} // namespace str
} // namespace util
......
......@@ -463,49 +463,81 @@ TEST(StringUtilTest, decodeFormattedHexString) {
isc::BadValue);
}
// Verifies sanitizeString() function
TEST(StringUtilTest, sanitizeString) {
/// @brief Fucntion used to test StringSantitizer
/// @param original - string to sanitize
/// @param char_set - regular expression string describing invalid
/// characters
/// @param char_replacement - character(s) which replace invalid
/// characters
/// @param expected - expected sanitized string
void sanitizeStringTest(
const std::string& original,
const std::string& char_set,
const std::string& char_replacement,
const std::string& expected) {
StringSanitizerPtr ss;
std::string sanitized;
try {
ss.reset(new StringSanitizer(char_set, char_replacement));
} catch (const std::exception& ex) {
ADD_FAILURE() << "Could not construct sanitizer:" << ex.what();
return;
}
try {
sanitized = ss->scrub(original);
} catch (const std::exception& ex) {
ADD_FAILURE() << "Could not scrub string:" << ex.what();
return;
}
EXPECT_EQ(sanitized, expected);
}
// Verifies StringSantizer class
TEST(StringUtilTest, stringSanitizer) {
// Bad regular expression should throw.
ASSERT_THROW (sanitized = sanitizeString("just a string", "[bogus-regex",""),
BadValue);
StringSanitizerPtr ss;
ASSERT_THROW (ss.reset(new StringSanitizer("[bogus-regex","")), BadValue);
// List of invalid chars should work: (b,c,2 are invalid)
ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[b-c2]","*"));
EXPECT_EQ(sanitized, "a**.1*3");
sanitizeStringTest("abc.123", "[b-c2]", "*",
"a**.1*3");
// Inverted list for valid chars should work too: (b,c,2 are valid)
ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[^b-c2]","*"));
EXPECT_EQ(sanitized, "*bc**2*");
// Inverted list of valid chars should work: (b,c,2 are invalid)
sanitizeStringTest("abc.123", "[^b-c2]", "*",
"*bc**2*");
// A string of all valid chars should return an identical string.
ASSERT_NO_THROW (sanitized = sanitizeString("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]","x"));
EXPECT_EQ(sanitized, "-_A--B__Cabc34567_-");
sanitizeStringTest("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x",
"-_A--B__Cabc34567_-");
// Replacing with a character should work.
ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]","*"));
EXPECT_EQ(sanitized, "A*b*c*JoE3*_x*B*Y*e");
sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*",
"A*b*c*JoE3*_x*B*Y*e");
// Removing (i.e.replacing with an "empty" string) should work.
ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]",""));
EXPECT_EQ(sanitized, "AbcJoE3_xBYe");
sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "",
"AbcJoE3_xBYe");
// More than one non-matching in a row should work.
ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","x"));
EXPECT_EQ(sanitized, "xxAxxBxxCxx");
sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x",
"xxAxxBxxCxx");
// Removing than one non-matching in a row should work.
ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]",""));
EXPECT_EQ(sanitized, "ABC");
sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "",
"ABC");
// Replacing with a string should work.
ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","xyz"));
EXPECT_EQ(sanitized, "xyzxyzAxyzxyzBxyzxyzCxyzxyz");
sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz",
"xyzxyzAxyzxyzBxyzxyzCxyzxyz");
// Dots as valid chars work.
ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[^A-Za-z0-9_.]","*"));
EXPECT_EQ(sanitized, "abc.123");
sanitizeStringTest("abc.123", "[^A-Za-z0-9_.]", "*",
"abc.123");
}
} // end of anonymous namespace
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment