csv_file.h 17.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
// Copyright (C) 2014 Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.

#ifndef CSV_FILE_H
#define CSV_FILE_H

#include <exceptions/exceptions.h>
#include <boost/lexical_cast.hpp>
#include <boost/shared_ptr.hpp>
#include <fstream>
#include <ostream>
#include <string>
#include <vector>

namespace isc {
namespace util {

/// @brief Exception thrown when an error occurs during CSV file processing.
class CSVFileError : public Exception {
public:
    CSVFileError(const char* file, size_t line, const char* what) :
        isc::Exception(file, line, what) { };
};

/// @brief Represents a single row of the CSV file.
///
/// The object of this type can create the string holding a collection of the
39 40 41
/// comma separated values, representing a row of the CSV file. It allows the
/// selection of any character as a separator for the values. The default
/// separator is the comma symbol.
42 43 44 45
///
/// The @c CSVRow object can be constructed in two different ways. The first
/// option is that the caller creates an object holding empty values
/// and then adds values one by one. Note that it is possible to either add
46
/// a string or a number. The number is converted to the appropriate text
47 48 49 50
/// representation. When all the values are added, the text representation of
/// the row can be obtained by calling @c CSVRow::render function or output
/// stream operator.
///
51 52 53 54 55
/// The @c CSVRow object can be also constructed by parsing a row of a CSV
/// file. In this case, the separator has to be known in advance and passed to
/// the class constructor. The constructor will call the @c CSVRow::parse
/// function internally to tokenize the CSV row and create the collection of
/// values. The class accessors can be then used to retrieve individual values.
56 57
///
/// This class is meant to be used by the @c CSVFile class to manipulate
58
/// individual rows of the CSV file.
59 60 61 62 63 64 65 66 67 68 69 70 71 72
class CSVRow {
public:

    /// @brief Constructor, creates the raw to be used for output.
    ///
    /// Creates CSV row with empty values. The values should be
    /// later set using the @c CSVRow::writeAt functions. When the
    /// @c CSVRow::render is called, the text representation of the
    /// row will be created using a separator character specified
    /// as an argument of this constructor.
    ///
    /// This constructor is exception-free.
    ///
    /// @param cols Number of values in the row.
73
    /// @param separator Character used as a separator between values in the
74
    /// text representation of the row.
75
    CSVRow(const size_t cols = 0, const char separator = ',');
76 77 78 79 80 81

    /// @brief Constructor, parses a single row of the CSV file.
    ///
    /// This constructor should be used to parse a single row of the CSV
    /// file. The separator being used for the particular row needs to
    /// be known in advance and specified as an argument of the constructor
82
    /// if other than the default separator is used in the row being parsed.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    /// An example string to be parsed by this function looks as follows:
    /// "foo,bar,foo-bar".
    ///
    /// This constructor is exception-free.
    ///
    /// @param text Text representation of the CSV row.
    /// @param separator Character being used as a separator in a parsed file.
    CSVRow(const std::string& text, const char separator = ',');

    /// @brief Returns number of values in a CSV row.
    size_t getValuesCount() const {
        return (values_.size());
    }

    /// @brief Parse the CSV file row.
    ///
    /// This function parses a string containing CSV values and assigns them
    /// to the @c values_ private container. These values can be retrieved
    /// from the container by calling @c CSVRow::readAt function.
    ///
    /// This function is exception-free.
    ///
    /// @param line String holding a row of comma separated values.
    void parse(const char* line);

    /// @brief Retrieves a value from the internal container.
    ///
    /// @param at Index of the value in the container. The values are indexed
    /// from 0, where 0 corresponds to the left-most value in the CSV file row.
    ///
    /// @return Value at specified index in the text form.
    ///
    /// @throw CSVFileError if the index is out of range. The number of elements
    /// being held by the container can be obtained using
    /// @c CSVRow::getValuesCount.
    std::string readAt(const size_t at) const;

120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
    /// @brief Retrieves a value from the internal container.
    ///
    /// This method is reads a value from the internal container and converts
    /// this value to the type specified as a template parameter. Internally
    /// it uses @c boost::lexical_cast.
    ///
    /// @param at Index of the value in the container. The values are indexed
    /// from 0, where 0 corresponds to the left-most value in the CSV file row.
    /// @tparam T type of the value to convert to.
    ///
    /// @return Converted value.
    ///
    /// @throw CSVFileError if the index is out of range or if the
    /// @c boost::bad_lexical_cast is thrown by the @c boost::lexical_cast.
    template<typename T>
    T readAndConvertAt(const size_t at) const {
        T cast_value;
        try {
            cast_value = boost::lexical_cast<T>(readAt(at).c_str());

        } catch (const boost::bad_lexical_cast& ex) {
            isc_throw(CSVFileError, ex.what());
        }
        return (cast_value);
    }

146 147 148
    /// @brief Creates a text representation of the CSV file row.
    ///
    /// This function iterates over all values currently held in the internal
149 150
    /// @c values_ container and appends them to a string. The values are
    /// separated using the separator character specified in the constructor.
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
    ///
    /// This function is exception free.
    ///
    /// @return Text representation of the CSV file row.
    std::string render() const;

    /// @brief Replaces the value at specified index.
    ///
    /// This function is used to set values to be rendered using
    /// @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written given as string.
    ///
    /// @throw CSVFileError if index is out of range.
    void writeAt(const size_t at, const char* value);

    /// @brief Replaces the value at specified index.
    ///
    /// This function is used to set values to be rendered using
    /// @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written given as string.
    ///
    /// @throw CSVFileError if index is out of range.
    void writeAt(const size_t at, const std::string& value);

    /// @brief Replaces the value at specified index.
    ///
    /// This function is used to set values to be rendered using
    /// @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written - typically a number.
    /// @tparam T Type of the value being written.
    ///
    /// @throw CSVFileError if index is out of range.
    template<typename T>
    void writeAt(const size_t at, const T value) {
        checkIndex(at);
        try {
            values_[at] = boost::lexical_cast<std::string>(value);
        } catch (const boost::bad_lexical_cast& ex) {
            isc_throw(CSVFileError, "unable to stringify the value to be"
                      " written in the CSV file row at position '"
                      << at << "'");
        }
    }

    /// @brief Equality operator.
    ///
    /// Two CSV rows are equal when their string representation is equal. This
    /// includes the order of fields, separator etc.
    ///
    /// @param other Object to compare to.
    bool operator==(const CSVRow& other) const;

    /// @brief Unequality operator.
    ///
    /// Two CSV rows are unequal when their string representation is unequal.
    /// This includes the order of fields, separator etc.
    ///
    /// @param other Object to compare to.
    bool operator!=(const CSVRow& other) const;

private:

    /// @brief Check if the specified index of the value is in range.
    ///
    /// This function is used interally by other functions.
    ///
    /// @param at Value index.
    /// @throw CSVFileError if specified index is not in range.
    void checkIndex(const size_t at) const;

    /// @brief Separator character specifed in the constructor.
    char separator_;

    /// @brief Internal container holding values that belong to the row.
    std::vector<std::string> values_;
};

/// @brief Overrides standard output stream operator for @c CSVRow object.
///
/// The resulting string of characters is the same as the one returned by
/// @c CSVRow::render function.
///
/// @param os Output stream.
/// @param row Object representing a CSV file row.
std::ostream& operator<<(std::ostream& os, const CSVRow& row);

/// @brief Provides input/output access to CSV files.
///
/// This class provides basic methods to access (parse) and create CSV files.
/// The file is identified by its name qualified with the absolute path.
/// The name of the file is passed to the constructor. Constructor doesn't
/// open/create a file, but simply records a file name specified by a caller.
///
/// There are two functions that can be used to open a file:
/// - @c open - opens an existing file; if the file doesn't exist it creates it,
/// - @c recreate - removes existing file and creates a new one.
///
/// When the file is opened its header file is parsed and column names are
/// idenetified. At this point it is already possible to get the list of the
/// column names using appropriate accessors. The data rows are not parsed
/// at this time. The row parsing is triggered by calling @c next function.
/// The result of parsing a row is stored in the @c CSVRow object passed as
/// a parameter.
///
/// When the new file is created (when @c recreate is called), the CSV header is
/// immediately written into it. The header consists of the column names
/// specified with the @c addColumn function. The subsequent rows are written
/// into this file by calling @c append.
class CSVFile {
public:

    /// @brief Constructor.
    ///
    /// @param filename CSV file name.
    CSVFile(const std::string& filename);

    /// @brief Destructor
    virtual ~CSVFile();

    /// @brief Adds new column name.
    ///
    /// This column adds a new column but doesn't write it to the file yet.
    /// The name of the column will be placed in the CSV header when new file
    /// is created by calling @c recreate or @c open function.
    ///
    /// @param col_name Name of the column.
    ///
    /// @throw CSVFileError if a column with the specified name exists.
    void addColumn(const std::string& col_name);

    /// @brief Writes the CSV row into the file.
    ///
    /// @param Object representing a CSV file row.
    ///
    /// @throw CSVFileError When error occured during IO operation or if the
    /// size of the row doesn't match the number of columns.
    void append(const CSVRow& row) const;

    /// @brief Closes the CSV file.
    void close();

298 299 300
    /// @brief Flushes a file.
    void flush() const;

301 302 303 304 305
    /// @brief Returns the number of columns in the file.
    size_t getColumnCount() const {
        return (cols_.size());
    }

306 307 308 309 310
    /// @brief Returns the path to the CSV file.
    std::string getFilename() const {
        return (filename_);
    }

311 312 313 314 315 316 317 318
    /// @brief Returns the description of the last error returned by the
    /// @c CSVFile::next function.
    ///
    /// @return Description of the last error during row validation.
    std::string getReadMsg() const {
        return (read_msg_);
    }

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
    /// @brief Returns the index of the column having specified name.
    ///
    /// This function is exception safe.
    ///
    /// @param col_name Name of the column.
    /// @return Index of the column or negative value if the column doesn't
    /// exist.
    int getColumnIndex(const std::string& col_name) const;

    /// @brief Returns the name of the column.
    ///
    /// @param col_index Index of the column.
    ///
    /// @return Name of the column.
    /// @throw CSVFileError if the specified index is out of range.
    std::string getColumnName(const size_t col_index) const;

    /// @brief Reads next row from CSV file.
    ///
    /// This function will return the @c CSVRow object representing a
    /// parsed row if parsing is successful. If the end of file has been
    /// reached, the empty row is returned (a row containing no values).
    ///
    /// @param [out] row Object receiving the parsed CSV file.
343 344 345 346 347
    /// @param skip_validation Do not perform validation.
    ///
    /// @return true if row has been read and validated; false if validation
    /// failed.
    bool next(CSVRow& row, const bool skip_validation = false);
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369

    /// @brief Opens existing file or creates a new one.
    ///
    /// This function will try to open existing file if this file has size
    /// greater than 0. If the file doesn't exist or has size of 0, the
    /// file is recreated. If the existing file has been opened, the header
    /// is parsed and column names are initialized in the @c CSVFile object.
    /// The data pointer in the file is set to the beginning of the first
    /// row. In order to retrieve the row contents the @c next function should
    /// be called.
    ///
    /// @throw CSVFileError when IO operation fails.
    void open();

    /// @brief Creates a new CSV file.
    ///
    /// The file creation will fail if there are no columns specified.
    /// Otherwise, this function will write the header to the file.
    /// In order to write rows to opened file, the @c append function
    /// should be called.
    void recreate();

370 371 372 373 374 375 376 377 378 379 380 381 382
    /// @brief Sets error message after row validation.
    ///
    /// The @c CSVFile::validate function is responsible for setting the
    /// error message after validation of the row read from the CSV file.
    /// It will use this function to set this message. Note, that the
    /// @c validate function can set a message after successful validation
    /// too. Such message could say "success", or something similar.
    ///
    /// @param val_msg Error message to be set.
    void setReadMsg(const std::string& read_msg) {
        read_msg_ = read_msg;
    }

383 384 385 386 387 388
    /// @brief Represents empty row.
    static CSVRow EMPTY_ROW() {
        static CSVRow row(0);
        return (row);
    }

389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
protected:

    /// @brief Validate the row read from a file.
    ///
    /// This function implements a basic validation for the row read from the
    /// CSV file. It is virtual so as it may be customized in derived classes.
    ///
    /// This default implementation checks that the number of values in the
    /// row corresponds to the number of columns specified for this file.
    ///
    /// If row validation fails, the error message is noted and can be retrieved
    /// using @c CSVFile::getReadMsg. The function which overrides this
    /// base implementation is responsible for setting the error message using
    /// @c CSVFile::setReadMsg.
    ///
    /// @param row A row to be validated.
    ///
    /// @return true if the column is valid; false otherwise.
    virtual bool validate(const CSVRow& row);

409 410
private:

411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
    /// @brief This function validates the header of the CSV file.
    ///
    /// If there are any columns added to the @c CSVFile object, it will
    /// compare that they exactly match (including order) the header read
    /// from the file.
    ///
    /// This function is called internally by @CSVFile::open.
    ///
    /// @param header A row holding a header.
    /// @return true if header matches the columns; false otherwise.
    bool validateHeader(const CSVRow& header);

    /// @brief Sanity check if stream is open.
    ///
    /// Checks if the file stream is open so as IO operations can be performed
    /// on it. This is internally called by the public class members to prevent
    /// them from performing IO operations on invalid stream and using NULL
    /// pointer to a stream.
    ///
    /// @throw CSVFileError if stream is closed or pointer to it is NULL.
    void checkStreamStatus(const std::string& operation) const;

433 434 435 436 437 438 439 440 441 442 443 444 445 446
    /// @brief Returns size of the CSV file.
    std::ifstream::pos_type size() const;

    /// @brief Separator used by CSV file.
    char primary_separator_;

    /// @brief CSV file name.
    std::string filename_;

    /// @brief Holds a pointer to the file stream.
    boost::shared_ptr<std::fstream> fs_;

    /// @brief Holds CSV file columns.
    std::vector<std::string> cols_;
447 448 449

    /// @brief Holds last error during row reading or validation.
    std::string read_msg_;
450 451 452 453 454 455
};

} // namespace isc::util
} // namespace isc

#endif // CSV_FILE_H