csv_file.cc 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
// Copyright (C) 2014 Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.

#include <util/csv_file.h>
#include <fstream>
#include <sstream>

namespace isc {
namespace util {

CSVRow::CSVRow(const size_t cols, const char separator)
    : separator_(separator), values_(cols) {
}

CSVRow::CSVRow(const std::string& text, const char separator)
    : separator_(separator) {
    // Parsing is exception safe, so this will not throw.
    parse(text.c_str());
}

void
CSVRow::parse(const char* line) {
    std::string s(line);
    // The 'pos' value holds the current position in the parsed stream.
    // Normally, it points to the position of one of the the separator
    // characters following the parsed value. For the first value, it
    // has to be set to -1.
    int pos = -1;
    // Position of the first character of the currently parsed value.
    size_t start_pos;
    // Flag which indicates whether parsing should end because last value
    // has been just parsed.
    bool leave = false;
    // Temporary container which holds parsed values. On successful
    // parsing completion, the contents of this container are moved
    // to the container holding values for the row.
    std::vector<std::string> values;

    do {
        // Set the position of the currently parsed value.
        start_pos = pos + 1;
        // Find the first separator, following the character at
        // start_pos.
        pos = s.find(separator_, start_pos);
        // The last value is not followed by a separator, so if
        // we reached the end of line, take reminder of the string
        // and make it a value.
        if (pos == std::string::npos) {
            pos = s.length();
            // Finish parsing as we already parsed the last value.
            leave = true;
        }
        // Store the parsed value.
        values.push_back(s.substr(start_pos, pos - start_pos));
    } while (!leave);

    // Assign new values.
    std::swap(values, values_);
}

std::string
CSVRow::readAt(const size_t at) const {
    checkIndex(at);
    return (values_[at]);
}

std::string
CSVRow::render() const {
    std::ostringstream s;
    for (int i = 0; i < values_.size(); ++i) {
        // Do not put separator before the first value.
        if (i > 0) {
            s << separator_;
        }
        s << values_[i];
    }
    return (s.str());
}

void
CSVRow::writeAt(const size_t at, const char* value) {
    checkIndex(at);
    values_[at] = value;
}

void
CSVRow::writeAt(const size_t at, const std::string& value) {
    writeAt(at, value.c_str());
}

bool
CSVRow::operator==(const CSVRow& other) const {
    return (render() == other.render());
}

bool
CSVRow::operator!=(const CSVRow& other) const {
    return (render() != other.render());
}

std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
    os << row.render();
    return (os);
}

void
CSVRow::checkIndex(const size_t at) const {
    if (at >= values_.size()) {
        isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
                  " is out of bounds; maximal index is '"
                  << (values_.size() - 1) << "'");
    }
}

CSVFile::CSVFile(const std::string& filename)
127 128
    : primary_separator_(','), filename_(filename), fs_(), cols_(0),
      read_msg_() {
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
}

CSVFile::~CSVFile() {
    close();
}

void
CSVFile::close() {
    // It is allowed to close multiple times. If file has been already closed,
    // this is no-op.
    if (fs_) {
        fs_->close();
        fs_.reset();
    }
}

145 146 147 148 149 150
void
CSVFile::flush() const {
    checkStreamStatus("flush");
    fs_->flush();
}

151 152 153 154 155 156 157 158 159 160 161
void
CSVFile::addColumn(const std::string& col_name) {
    if (getColumnIndex(col_name) >= 0) {
        isc_throw(CSVFileError, "attempt to add duplicate column '"
                  << col_name << "'");
    }
    cols_.push_back(col_name);
}

void
CSVFile::append(const CSVRow& row) const {
162 163 164 165
    checkStreamStatus("append");

    // If a stream is in invalid state, reset the state.
    fs_->clear();
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

    if (row.getValuesCount() != getColumnCount()) {
        isc_throw(CSVFileError, "number of values in the CSV row '"
                  << row.getValuesCount() << "' doesn't match the number of"
                  " columns in the CSV file '" << getColumnCount() << "'");
    }

    fs_->seekp(0, std::ios_base::end);
    fs_->seekg(0, std::ios_base::end);
    fs_->clear();

    std::string text = row.render();
    *fs_ << text << std::endl;
    if (!fs_->good()) {
        fs_->clear();
        isc_throw(CSVFileError, "failed to write CSV row '"
                  << text << "' to the file '" << filename_ << "'");
    }
}

186 187 188 189 190 191 192 193 194 195 196 197 198
void
CSVFile::checkStreamStatus(const std::string& operation) const {
    if (!fs_) {
        isc_throw(CSVFileError, "NULL stream pointer when performing '"
                  << operation << "' on file '" << filename_ << "'");

    } else if (!fs_->is_open()) {
        isc_throw(CSVFileError, "closed stream when performing '"
                  << operation << "' on file '" << filename_ << "'");

    }
}

199 200 201 202 203 204 205 206 207 208
std::ifstream::pos_type
CSVFile::size() const {
    std::ifstream fs(filename_.c_str());
    bool ok = fs.good();
    // If something goes wrong, including that the file doesn't exist,
    // return 0.
    if (!ok) {
        fs.close();
        return (0);
    }
209 210 211 212 213 214 215 216 217 218
    std::ifstream::pos_type pos;
    try {
        // Seek to the end of file and see where we are. This is a size of
        // the file.
        fs.seekg(0, std::ifstream::end);
        pos = fs.tellg();
        fs.close();
    } catch (const std::exception& ex) {
        return (0);
    }
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    return (pos);
}

int
CSVFile::getColumnIndex(const std::string& col_name) const {
    for (int i = 0; i < cols_.size(); ++i) {
        if (cols_[i] == col_name) {
            return (i);
        }
    }
    return (-1);
}

std::string
CSVFile::getColumnName(const size_t col_index) const {
    if (col_index > cols_.size()) {
        isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
                  " CSV file '" << filename_ << "' is out of range; the CSV"
                  " file has only  " << cols_.size() << " columns ");
    }
    return (cols_[col_index]);
}

242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
bool
CSVFile::next(CSVRow& row, const bool skip_validation) {
    // Set somethings as row validation error. Although, we haven't started
    // actual row validation we should get rid of any previously recorded
    // errors so as the caller doesn't interpret them as the current one.
    setReadMsg("validation not started");

    try {
        // Check that stream is "ready" for any IO operations.
        checkStreamStatus("get next row");

    } catch (isc::Exception& ex) {
        setReadMsg(ex.what());
        return (false);
    }

    // If a stream is in invalid state, reset the state.
    fs_->clear();

261 262 263 264 265 266 267
    // Get exactly one line of the file.
    std::string line;
    std::getline(*fs_, line);
    // If we got empty line because we reached the end of file
    // return an empty row.
    if (line.empty() && fs_->eof()) {
        row = EMPTY_ROW();
268 269 270 271 272 273 274 275
        return (true);

    } else if (!fs_->good()) {
        // If we hit an IO error, communicate it to the caller but do NOT close
        // the stream. Caller may try again.
        setReadMsg("error reading a row from CSV file '"
                   + std::string(filename_) + "'");
        return (false);
276 277 278
    }
    // If we read anything, parse it.
    row.parse(line.c_str());
279 280 281

    // And check if it is correct.
    return (skip_validation ? true : validate(row));
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
}

void
CSVFile::open() {
    // If file doesn't exist or is empty, we have to create our own file.
    if (size() == 0) {
        recreate();

    } else {
        // Try to open existing file, holding some data.
        fs_.reset(new std::fstream(filename_.c_str()));
        // The file may fail to open. For example, because of insufficient
        // persmissions. Although the file is not open we should call close
        // to reset our internal pointer.
        if (!fs_->is_open()) {
            close();
            isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
        }
        // Make sure we are on the beginning of the file, so as we can parse
        // the header.
        fs_->seekg(0);
303 304 305 306 307
        if (!fs_->good()) {
            close();
            isc_throw(CSVFileError, "unable to set read pointer in the file '"
                      << filename_ << "'");
        }
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
        // Read the header.
        CSVRow header;
        if (!next(header, true)) {
            close();
            isc_throw(CSVFileError, "failed to read and parse header of the"
                      " CSV file '" << filename_ << "': "
                      << getReadMsg());
        }

        // Check the header against the columns specified for the CSV file.
        if (!validateHeader(header)) {
            close();
            isc_throw(CSVFileError, "invalid header '" << header
                      << "' in CSV file '" << filename_ << "'");
        }

        // Everything is good, so if we haven't added any columns yet,
        // add them.
        if (getColumnCount() == 0) {
            for (size_t i = 0; i < header.getValuesCount(); ++i) {
                addColumn(header.readAt(i));
            }
331 332 333 334 335 336
        }
    }
}

void
CSVFile::recreate() {
337
    // There is no sense creating a file if we don't specify columns for it.
338 339 340 341 342 343
    if (getColumnCount() == 0) {
        close();
        isc_throw(CSVFileError, "no columns defined for the newly"
                  " created CSV file '" << filename_ << "'");
    }

344
    // Close any dangling files.
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
    close();
    fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
    if (!fs_->is_open()) {
        close();
        isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
    }
    // Opened successfuly. Write a header to it.
    try {
        CSVRow header(getColumnCount());
        for (int i = 0; i < getColumnCount(); ++i) {
            header.writeAt(i, getColumnName(i));
        }
        *fs_ << header << std::endl;

    } catch (const std::exception& ex) {
        close();
        isc_throw(CSVFileError, ex.what());
    }

}

366 367 368 369 370 371 372 373 374 375 376 377
bool
CSVFile::validate(const CSVRow& row) {
    setReadMsg("success");
    bool ok = (row.getValuesCount() == getColumnCount());
    if (!ok) {
        std::ostringstream s;
        s << "the size of the row '" << row << "' doesn't match the number of"
            " columns '" << getColumnCount() << "' of the CSV file '"
          << filename_ << "'";
        setReadMsg(s.str());
    }
    return (ok);
378
}
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395

bool
CSVFile::validateHeader(const CSVRow& header) {
    if (getColumnCount() == 0) {
        return (true);
    }

    if (getColumnCount() != header.getValuesCount()) {
        return (false);
    }

    for (int i = 0; i < getColumnCount(); ++i) {
        if (getColumnName(i) != header.readAt(i)) {
            return (false);
        }
    }
    return (true);
396
}
397 398 399

} // end of isc::util namespace
} // end of isc namespace