Commit dabd4188 authored by Thomas Markwalder's avatar Thomas Markwalder

[#174,!414] HA now logs server times along with skew

src/hooks/dhcp/high_availability/communication_state.*
    ComunicationState::setPartnerTime() - in addition to the skew
    it now retains current time and partner time

    CommunicationState::logFormatClockSkew() - now outputs current
    time and partner time, along with skew value

src/hooks/dhcp/high_availability/ha_messages.*
    HA_HIGH_CLOCK_SKEW
    HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION - modified to accomdate peer times

    HA_TERMINATED - reworded, as entering this state is a generic endpoint that
    may, in future be reached for reasons other than clock skew

src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc
    TEST_F(CommunicationStateTest, logFormatClockSkew) - updated
parent 559d9261
1627. [func] tmark
High Availaiblity logging now also emits server and partner system
times when reporting clock skew issues. Prior to this it reported
only the skew between the two servers.
(Gitlab #174,!414, git TBD)
1626. [bug] marcin
Automatically delete embedded options as a result of deleting
a subnet, pool or shared network from the MySQL Configuration
......
// Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC")
// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
......@@ -15,6 +15,7 @@
#include <dhcp/pkt4.h>
#include <dhcp/pkt6.h>
#include <http/date_time.h>
#include <util/boost_time_utils.h>
#include <boost/bind.hpp>
#include <boost/pointer_cast.hpp>
#include <sstream>
......@@ -46,7 +47,7 @@ CommunicationState::CommunicationState(const IOServicePtr& io_service,
: io_service_(io_service), config_(config), timer_(), interval_(0),
poke_time_(boost::posix_time::microsec_clock::universal_time()),
heartbeat_impl_(0), partner_state_(-1), clock_skew_(0, 0, 0, 0),
last_clock_skew_warn_() {
last_clock_skew_warn_(), my_time_at_skew_(), partner_time_at_skew_() {
}
CommunicationState::~CommunicationState() {
......@@ -216,26 +217,36 @@ CommunicationState::isClockSkewGreater(const long seconds) const {
void
CommunicationState::setPartnerTime(const std::string& time_text) {
HttpDateTime partner_time = HttpDateTime().fromRfc1123(time_text);
HttpDateTime current_time = HttpDateTime();
partner_time_at_skew_ = HttpDateTime().fromRfc1123(time_text).getPtime();
my_time_at_skew_ = HttpDateTime().getPtime();
clock_skew_ = partner_time.getPtime() - current_time.getPtime();
clock_skew_ = partner_time_at_skew_ - my_time_at_skew_;
}
std::string
CommunicationState::logFormatClockSkew() const {
std::ostringstream s;
std::ostringstream os;
if ((my_time_at_skew_.is_not_a_date_time()) ||
(partner_time_at_skew_.is_not_a_date_time())) {
// Guard against being called before times have been set.
// Otherwise we'll get out-range exceptions.
return ("skew not initialized");
}
os << "my time: " << util::ptimeToText(my_time_at_skew_)
<< ", partner's time: " << util::ptimeToText(partner_time_at_skew_)
<< ", partner's clock is ";
// If negative clock skew, the partner's time is behind our time.
if (clock_skew_.is_negative()) {
s << clock_skew_.invert_sign().total_seconds() << "s behind";
os << clock_skew_.invert_sign().total_seconds() << "s behind";
} else {
// Partner's time is ahead of ours.
s << clock_skew_.total_seconds() << "s ahead";
os << clock_skew_.total_seconds() << "s ahead";
}
return (s.str());
return (os.str());
}
CommunicationState4::CommunicationState4(const IOServicePtr& io_service,
......
// Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC")
// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
......@@ -276,7 +276,9 @@ public:
/// calculated.
///
/// @param time_text Partner's time received in response to a heartbeat. The
/// time must be provided in the RFC 1123 format.
/// time must be provided in the RFC 1123 format. It stores the current
/// time, partner's time, and the difference (skew) between them.
///
/// @throw isc::http::HttpTimeConversionError if the time format is invalid.
///
/// @todo Consider some other time formats which include millisecond
......@@ -317,6 +319,12 @@ protected:
/// @brief Holds a time when last warning about too high clock skew
/// was issued.
boost::posix_time::ptime last_clock_skew_warn_;
/// @brief My time when skew was calculated.
boost::posix_time::ptime my_time_at_skew_;
/// @brief Partner reported time when skew was calculated.
boost::posix_time::ptime partner_time_at_skew_;
};
/// @brief Type of the pointer to the @c CommunicationState object.
......
// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Wed Jun 26 2019 15:13
// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon Jul 08 2019 13:20
#include <cstddef>
#include <log/message_types.h>
......@@ -104,8 +104,8 @@ const char* values[] = {
"HA_HEARTBEAT_COMMUNICATIONS_FAILED", "failed to send heartbeat to %1: %2",
"HA_HEARTBEAT_FAILED", "heartbeat to %1 failed: %2",
"HA_HEARTBEAT_HANDLER_FAILED", "heartbeat command failed: %1",
"HA_HIGH_CLOCK_SKEW", "partner's clock is %1, please synchronize clocks!",
"HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION", "partner's clock is %1, causing HA service to terminate",
"HA_HIGH_CLOCK_SKEW", "%1, please synchronize clocks!",
"HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION", "%1, causing HA service to terminate",
"HA_INIT_OK", "loading High Availability hooks library successful",
"HA_LEASES4_COMMITTED_FAILED", "leases4_committed callout failed: %1",
"HA_LEASES4_COMMITTED_NOTHING_TO_UPDATE", "%1: leases4_committed callout was invoked without any leases",
......@@ -137,7 +137,7 @@ const char* values[] = {
"HA_SYNC_HANDLER_FAILED", "ha-sync command failed: %1",
"HA_SYNC_START", "starting lease database synchronization with %1",
"HA_SYNC_SUCCESSFUL", "lease database synchronization with %1 completed successfully in %2",
"HA_TERMINATED", "HA service terminated because of the unacceptable clock skew; fix the problem and restart!",
"HA_TERMINATED", "HA service terminated due to an unrecoverable condition. Check previous error message(s), address the problem and restart!",
NULL
};
......
// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Wed Jun 26 2019 15:13
// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon Jul 08 2019 13:20
#ifndef HA_MESSAGES_H
#define HA_MESSAGES_H
......
......@@ -163,14 +163,14 @@ This error message is issued to indicate that the heartbeat command handler
failed while processing the command. The argument provides the reason for
failure.
% HA_HIGH_CLOCK_SKEW partner's clock is %1, please synchronize clocks!
% HA_HIGH_CLOCK_SKEW %1, please synchronize clocks!
This warning message is issued when the clock skew between the active servers
exceeds 30 seconds. The HA service continues to operate but may not function
properly, especially for low lease lifetimes. The administrator should
should synchronize the clocks, e.g. using NTP. If the clock skew exceeds
60 seconds, the HA service will terminate.
% HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION partner's clock is %1, causing HA service to terminate
% HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION %1, causing HA service to terminate
This warning message is issued when the clock skew between the active servers
exceeds 60 seconds. The HA service stops. The servers will continue to respond
to the DHCP queries but won't exchange lease updates or send heartbeats.
......@@ -360,7 +360,8 @@ lease database synchronization with the partner. The first argument specifies
the name of the partner server. The second argument specifies the duration of
the synchronization.
% HA_TERMINATED HA service terminated because of the unacceptable clock skew; fix the problem and restart!
% HA_TERMINATED HA service terminated due to an unrecoverable condition. Check previous error message(s), address the problem and restart!
This error message is issued to indicate that the HA service has been stopped
due to unacceptable clock skew. The error can be fixed by synchronizing the
clocks on the active servers and restarting Kea.
due to an unacceptable condition (e.g. too large of a clock skew). The exact
cause should appear in a previous error message. Address the condition
reported then restart the servers to resume service.
// Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC")
// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
......@@ -368,14 +368,29 @@ TEST_F(CommunicationStateTest, clockSkew) {
// This test verifies that the clock skew value is formatted correctly
// for logging.
TEST_F(CommunicationStateTest, logFormatClockSkew) {
// Partner time is ahead by 15s.
state_.clock_skew_ += boost::posix_time::time_duration(0, 0, 15);
EXPECT_EQ("15s ahead", state_.logFormatClockSkew());
// Make sure logFormatClockSkew() does not throw if called prior
// the first call to setPartnerTime().
std::string log;
ASSERT_NO_THROW(log = state_.logFormatClockSkew());
EXPECT_EQ(std::string("skew not initialized"), log);
// Partner time is behind by 1m23s.
state_.setPartnerTime(HttpDateTime().rfc1123Format());
state_.clock_skew_ -= boost::posix_time::time_duration(0, 1, 23);
EXPECT_EQ("83s behind", state_.logFormatClockSkew());
// Get current time.
boost::posix_time::ptime now = HttpDateTime().getPtime();
// Partner time is ahead by 15s.
boost::posix_time::time_duration offset(0,0,15,0);
state_.setPartnerTime(HttpDateTime(now + offset).rfc1123Format());
ASSERT_NO_THROW(log = state_.logFormatClockSkew());
// We don't check the exact string for obvious reasons.
EXPECT_TRUE(log.find("s ahead") != std::string::npos) <<
" log content wrong: " << log;
// Partner time is behind by 15s.
state_.setPartnerTime(HttpDateTime(now - offset).rfc1123Format());
ASSERT_NO_THROW(log = state_.logFormatClockSkew());
// We don't check the exact string for obvious reasons.
EXPECT_TRUE(log.find("s behind") != std::string::npos) <<
" log content wrong: " << log;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment