strutil.h 8.43 KB
Newer Older
1
// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
2
//
3 4 5
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6

7 8
#ifndef STRUTIL_H
#define STRUTIL_H
9 10 11

#include <algorithm>
#include <cctype>
12
#include <stdint.h>
13
#include <string>
14
#include <sstream>
15
#include <vector>
16 17
#include <exceptions/exceptions.h>
#include <boost/lexical_cast.hpp>
18 19

namespace isc {
20 21
namespace util {
namespace str {
22 23 24

/// \brief A Set of C++ Utilities for Manipulating Strings

25 26 27 28 29 30 31 32 33 34
///
/// \brief A standard string util exception that is thrown if getToken or
/// numToToken are called with bad input data
///
class StringTokenError : public Exception {
public:
    StringTokenError(const char* file, size_t line, const char* what) :
        isc::Exception(file, line, what) {}
};

35 36
/// \brief Normalize Backslash
///
37 38 39 40
/// Only relevant to Windows, this replaces all "\" in a string with "/"
/// and returns the result.  On other systems it is a no-op.  Note
/// that Windows does recognize file names with the "\" replaced by "/"
/// (at least in system calls, if not the command line).
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
///
/// \param name Name to be substituted
void normalizeSlash(std::string& name);


/// \brief Trim Leading and Trailing Spaces
///
/// Returns a copy of the input string but with any leading or trailing spaces
/// or tabs removed.
///
/// \param instring Input string to modify
///
/// \return String with leading and trailing spaces removed
std::string trim(const std::string& instring);


/// \brief Split String into Tokens
///
/// Splits a string into tokens (the tokens being delimited by one or more of
/// the delimiter characters) and returns the tokens in a vector array. Note
/// that adjacent delimiters are considered to be a single delimiter.
///
/// Special cases are:
/// -# The empty string is considered to be zero tokens.
/// -# A string comprising nothing but delimiters is considered to be zero
///    tokens.
///
/// The reasoning behind this is that the string can be thought of as having
/// invisible leading and trailing delimiter characters.  Therefore both cases
/// reduce to a set of contiguous delimiters, which are considered a single
/// delimiter (so getting rid of the string).
///
/// We could use Boost for this, but this (simple) function eliminates one
/// dependency in the code.
///
/// \param text String to be split.  Passed by value as the internal copy is
/// altered during the processing.
/// \param delim Delimiter characters
///
/// \return Vector of tokens.
std::vector<std::string> tokens(const std::string& text,
        const std::string& delim = std::string(" \t\n"));


/// \brief Uppercase Character
///
/// Used in uppercase() to pass as an argument to std::transform().  The
/// function std::toupper() can't be used as it takes an "int" as its argument;
/// this confuses the template expansion mechanism because dereferencing a
/// string::iterator returns a char.
///
/// \param chr Character to be upper-cased.
///
/// \return Uppercase version of the argument
inline char toUpper(char chr) {
    return (static_cast<char>(std::toupper(static_cast<int>(chr))));
}


/// \brief Uppercase String
///
/// A convenience function to uppercase a string.
///
/// \param text String to be upper-cased.
inline void uppercase(std::string& text) {
    std::transform(text.begin(), text.end(), text.begin(),
107
        isc::util::str::toUpper);
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
}

/// \brief Lowercase Character
///
/// Used in lowercase() to pass as an argument to std::transform().  The
/// function std::tolower() can't be used as it takes an "int" as its argument;
/// this confuses the template expansion mechanism because dereferencing a
/// string::iterator returns a char.
///
/// \param chr Character to be lower-cased.
///
/// \return Lowercase version of the argument
inline char toLower(char chr) {
    return (static_cast<char>(std::tolower(static_cast<int>(chr))));
}

/// \brief Lowercase String
///
/// A convenience function to lowercase a string
///
/// \param text String to be lower-cased.
inline void lowercase(std::string& text) {
    std::transform(text.begin(), text.end(), text.begin(),
131
        isc::util::str::toLower);
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
}


/// \brief Apply Formatting
///
/// Given a printf-style format string containing only "%s" place holders
/// (others are ignored) and a vector of strings, this produces a single string
/// with the placeholders replaced.
///
/// \param format Format string
/// \param args Vector of argument strings
///
/// \return Resultant string
std::string format(const std::string& format,
    const std::vector<std::string>& args);


149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
/// \brief Returns one token from the given stringstream
///
/// Using the >> operator, with basic error checking
///
/// \exception StringTokenError if the token cannot be read from the stream
///
/// \param iss stringstream to read one token from
///
/// \return the first token read from the stringstream
std::string getToken(std::istringstream& iss);

/// \brief Converts a string token to an *unsigned* integer.
///
/// The value is converted using a lexical cast, with error and bounds
/// checking.
///
/// NumType is a *signed* integral type (e.g. int32_t) that is sufficiently
/// wide to store resulting integers.
///
/// BitSize is the maximum number of bits that the resulting integer can take.
/// This function first checks whether the given token can be converted to
/// an integer of NumType type.  It then confirms the conversion result is
171
/// within the valid range, i.e., [0, 2^BitSize - 1].  The second check is
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
/// necessary because lexical_cast<T> where T is an unsigned integer type
/// doesn't correctly reject negative numbers when compiled with SunStudio.
///
/// \exception StringTokenError if the value is out of range, or if it
///            could not be converted
///
/// \param num_token the string token to convert
///
/// \return the converted value, of type NumType
template <typename NumType, int BitSize>
NumType
tokenToNum(const std::string& num_token) {
    NumType num;
    try {
        num = boost::lexical_cast<NumType>(num_token);
187
    } catch (const boost::bad_lexical_cast&) {
188 189 190 191 192 193 194 195 196 197
        isc_throw(StringTokenError, "Invalid SRV numeric parameter: " <<
                  num_token);
    }
    if (num < 0 || num >= (static_cast<NumType>(1) << BitSize)) {
        isc_throw(StringTokenError, "Numeric SRV parameter out of range: " <<
                  num);
    }
    return (num);
}

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
/// \brief Converts a string in quotes into vector.
///
/// A converted string is first trimmed. If a trimmed string is in
/// quotes, the quotes are removed and the resulting string is copied
/// into a vector. If the string is not in quotes, an empty vector is
/// returned.
///
/// The resulting string is copied to a vector and returned.
///
/// This function is intended to be used by the server configuration
/// parsers to convert string values surrounded with quotes into
/// binary form.
///
/// \param quoted_string String to be converted.
/// \return Vector containing converted string or empty string if
/// input string didn't contain expected quote characters.
std::vector<uint8_t>
quotedStringToBinary(const std::string& quoted_string);

217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/// \brief Converts a string of hexadecimal digits with colons into
///  a vector.
///
/// This function supports the following formats:
/// - yy:yy:yy:yy:yy
/// - y:y:y:y:y
/// - y:yy:yy:y:y
///
/// If the decoded string doesn't match any of the supported formats,
/// an exception is thrown.
///
/// \param hex_string Input string.
/// \param binary Vector receiving converted string into binary.
/// \throw isc::BadValue if the format of the input string is invalid.
void
decodeColonSeparatedHexString(const std::string& hex_string,
                              std::vector<uint8_t>& binary);

/// \brief Converts a formatted string of hexadecimal digits into
/// a vector.
///
/// This function supports formats supported by
/// @ref decodeColonSeparatedHexString and the following additional
/// formats:
/// - yyyyyyyyyy
/// - 0xyyyyyyyyyy
///
/// If there is an odd number of hexadecimal digits in the input
/// string, the '0' is prepended to the string before decoding.
///
/// \param hex_string Input string.
/// \param binary Vector receiving converted string into binary.
/// \throw isc::BadValue if the format of the input string is invalid.
void
decodeFormattedHexString(const std::string& hex_string,
                         std::vector<uint8_t>& binary);


255 256
} // namespace str
} // namespace util
257 258
} // namespace isc

259
#endif // STRUTIL_H