Commit 2903127d authored by JINMEI Tatuya's avatar JINMEI Tatuya
Browse files

[1603] implemented LabelSequence::getHash(), which is needed for new renderer.

maptolower hidden in name.cc is now exposed to the LabelSequence implementation
in a "private" namespace.
parent fc609f2e
......@@ -13,9 +13,11 @@
// PERFORMANCE OF THIS SOFTWARE.
#include <dns/labelsequence.h>
#include <dns/name_internal.h>
#include <exceptions/exceptions.h>
#include <iostream>
#include <boost/functional/hash.hpp>
namespace isc {
namespace dns {
......@@ -79,5 +81,23 @@ LabelSequence::isAbsolute() const {
return (last_label_ == name_->offsets_.size());
}
size_t
LabelSequence::getHash(bool case_sensitive) const {
size_t length;
const char* s = getData(&length);
if (length > 16) {
length = 16;
}
size_t hash_val = 0;
while (length > 0) {
const unsigned char c = *s++;
boost::hash_combine(hash_val, case_sensitive ? c :
isc::dns::name::internal::maptolower[c]);
--length;
}
return (hash_val);
}
} // end namespace dns
} // end namespace isc
......@@ -134,6 +134,31 @@ public:
/// \return Reference to the original Name object
const Name& getName() const { return (*name_); }
/// \brief Calculate a simple hash for the label sequence.
///
/// This method calculates a hash value for the label sequence as binary
/// data. If \c case_sensitive is false, it ignores the case stored in
/// the labels; specifically, it normalizes the labels by converting all
/// upper case characters to lower case ones and calculates the hash value
/// for the result.
///
/// This method is intended to provide a lightweight way to store a
/// relatively small number of label sequences in a hash table.
/// For this reason it only takes into account data up to 16 octets
/// (16 was derived from BIND 9's implementation). Also, the function does
/// not provide any unpredictability; a specific sequence will always have
/// the same hash value. It should therefore not be used in the context
/// where an untrusted third party can mount a denial of service attack by
/// forcing the application to create a very large number of label
/// sequences that have the same hash value and expected to be stored in
/// a hash table.
///
/// \exception None
///
/// \param case_sensitive
/// \return A hash value for this label sequence.
size_t getHash(bool case_sensitive) const;
/// \brief Checks whether the label sequence is absolute
///
/// \return true if the last label is the root label
......
......@@ -23,11 +23,13 @@
#include <util/buffer.h>
#include <dns/exceptions.h>
#include <dns/name.h>
#include <dns/name_internal.h>
#include <dns/messagerenderer.h>
using namespace std;
using namespace isc::util;
using isc::dns::NameComparisonResult;
using namespace isc::dns::name::internal;
namespace isc {
namespace dns {
......@@ -46,12 +48,12 @@ namespace {
/// we chose the naive but simple hardcoding approach.
///
/// These definitions are derived from BIND 9's libdns module.
/// Note: it was not clear why the maptolower array was needed rather than
/// using the standard tolower() function. It was perhaps due performance
/// concern, but we were not sure. Even if it was performance reasons, we
/// should carefully assess the effect via benchmarking to avoid the pitfall of
/// premature optimization. We should revisit this point later.
static const char digitvalue[256] = {
/// Note: we could use the standard tolower() function instead of the
/// maptolower array, but a benchmark indicated that the private array could
/// improve the performance of message rendering (which internally uses the
/// array heavily) about 27%. Since we want to achieve very good performance
/// for message rendering in some cases, we'll keep using it.
const char digitvalue[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 16
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 32
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 48
......@@ -69,8 +71,11 @@ static const char digitvalue[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 256
};
}
static const unsigned char maptolower[] = {
namespace name {
namespace internal {
const unsigned char maptolower[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
......@@ -104,7 +109,8 @@ static const unsigned char maptolower[] = {
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
}
} // end of internal
} // end of name
namespace {
///
......
......@@ -166,6 +166,15 @@ private:
NameRelation relation_;
};
// This is effectively a "private" namespace, but exposed publicly so the
// definitions in it can be shared with the LabelSequence class. It's not
// expected to be used
namespace name {
namespace internal {
extern const unsigned char maptolower[];
}
}
///
/// The \c Name class encapsulates DNS names.
///
......
// Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
#ifndef __NAME_INTERNAL_H
#define __NAME_INTERNAL_H 1
// This is effectively a "private" namespace for the Name class implementation,
// but exposed publicly so the definitions in it can be shared with the
// LabelSequence class. It's not expected to be used even by other modules
// of this library, much less by normal applications. This header file is
// therefore not expected to be installed as part of the library.
namespace isc {
namespace dns {
namespace name {
namespace internal {
extern const unsigned char maptolower[];
} // end of internal
} // end of name
} // end of dns
} // end of isc
#endif // __NAME_INTERNAL_H
// Local Variables:
// mode: c++
// End:
......@@ -13,11 +13,18 @@
// PERFORMANCE OF THIS SOFTWARE.
#include <dns/labelsequence.h>
#include <dns/name.h>
#include <exceptions/exceptions.h>
#include <gtest/gtest.h>
#include <boost/functional/hash.hpp>
#include <string>
#include <set>
using namespace isc::dns;
using namespace std;
namespace {
......@@ -257,4 +264,98 @@ TEST_F(LabelSequenceTest, isAbsolute) {
ASSERT_TRUE(ls3.isAbsolute());
}
// A helper function that constructs the textual representation of a name label
// character for the given char value in the form of \DDD
string
getNumericLabel(char ch) {
string result;
result.push_back(0x5c); // push '\'
result.push_back(0x30 + ((ch / 100) % 10)); // encode the 1st digit
result.push_back(0x30 + ((ch / 10) % 10)); // encode the 2nd digit
result.push_back(0x30 + (ch % 10)); // encode the 3rd digit
return (result);
}
// The following are test data used in the getHash test below. Normally
// we use example/documentation domain names for testing, but in this case
// we'd specifically like to use more realistic data, and are intentionally
// using real-world samples: They are the NS names of root and some top level
// domains as of this test.
const char* const root_servers[] = {
"a.root-servers.net", "b.root-servers.net", "c.root-servers.net",
"d.root-servers.net", "e.root-servers.net", "f.root-servers.net",
"g.root-servers.net", "h.root-servers.net", "i.root-servers.net",
"j.root-servers.net", "k.root-servers.net", "l.root-servers.net",
"m.root-servers.net", NULL
};
const char* const gtld_servers[] = {
"a.gtld-servers.net", "b.gtld-servers.net", "c.gtld-servers.net",
"d.gtld-servers.net", "e.gtld-servers.net", "f.gtld-servers.net",
"g.gtld-servers.net", "h.gtld-servers.net", "i.gtld-servers.net",
"j.gtld-servers.net", "k.gtld-servers.net", "l.gtld-servers.net",
"m.gtld-servers.net", NULL
};
const char* const jp_servers[] = {
"a.dns.jp", "b.dns.jp", "c.dns.jp", "d.dns.jp", "e.dns.jp",
"f.dns.jp", "g.dns.jp", NULL
};
const char* const cn_servers[] = {
"a.dns.cn", "b.dns.cn", "c.dns.cn", "d.dns.cn", "e.dns.cn",
"ns.cernet.net", NULL
};
const char* const ca_servers[] = {
"k.ca-servers.ca", "e.ca-servers.ca", "a.ca-servers.ca", "z.ca-servers.ca",
"tld.isc-sns.net", "c.ca-servers.ca", "j.ca-servers.ca", "l.ca-servers.ca",
"sns-pb.isc.org", "f.ca-servers.ca", NULL
};
// A helper function used in the getHash test below.
void
hashDistributionCheck(const char* const* servers) {
const size_t BUCKETS = 64; // constant used in the MessageRenderer
set<Name> names;
vector<size_t> hash_counts(BUCKETS);
// Store all test names and their super domain names (excluding the
// "root" label) in the set, calculates their hash values, and increments
// the counter for the corresponding hash "bucket".
for (size_t i = 0; servers[i] != NULL; ++i) {
const Name name(servers[i]);
for (size_t l = 0; l < name.getLabelCount() - 1; ++l) {
pair<set<Name>::const_iterator, bool> ret =
names.insert(name.split(l));
if (ret.second) {
hash_counts[LabelSequence((*ret.first)).getHash(false) %
BUCKETS]++;
}
}
}
// See how many conflicts we have in the buckets. For the testing purpose
// we expect there's at most 2 conflicts in each set, which is an
// arbitrary choice (it should happen to succeed with the hash function
// and data we are using; if it's not the case, maybe with an update to
// the hash implementation, we should revise the test).
for (size_t i = 0; i < BUCKETS; ++i) {
EXPECT_GE(3, hash_counts[i]);
}
}
TEST_F(LabelSequenceTest, getHash) {
// Trivial case. The same sequence should have the same hash.
EXPECT_EQ(ls1.getHash(true), ls1.getHash(true));
// Check the case-insensitive mode behavior.
EXPECT_EQ(ls1.getHash(false), ls5.getHash(false));
// Check that the distribution of hash values is "not too bad" (such as
// everything has the same hash value due to a stupid bug). It's
// difficult to check such things reliably. We do some ad hoc tests here.
hashDistributionCheck(root_servers);
hashDistributionCheck(jp_servers);
hashDistributionCheck(cn_servers);
hashDistributionCheck(ca_servers);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment