Commit 5d79b60f authored by Mukund Sivaraman's avatar Mukund Sivaraman
Browse files

Improve performance of RBT (#41165)

parent aeb7b6e1
4277. [performance] Improve performance of the RBT, the central zone
datastructure: The aux hashtable was improved,
hash function was updated to perform more
uniform mapping, uppernode was added to
dns_rbtnode, and other cleanups and performance
improvements were made. [RT #41165]
4276. [protocol] Add support for SMIMEA. [RT #40513]
4275. [performance] Lazily initialize dns_compress->table only when
......
......@@ -568,7 +568,7 @@ setcachestats(dns_db_t *db, isc_stats_t *stats) {
return (dns_db_setcachestats(sampledb->rbtdb, stats));
}
static unsigned int
static size_t
hashsize(dns_db_t *db) {
sampledb_t *sampledb = (sampledb_t *) db;
......
......@@ -472,8 +472,7 @@ finddbent(dns_acache_t *acache, dns_db_t *db, dbentry_t **dbentryp) {
* The caller must be holding the acache lock.
*/
bucket = isc_hash_calc((const unsigned char *)&db,
sizeof(db), ISC_TRUE) % DBBUCKETS;
bucket = isc_hash_function(&db, sizeof(db), ISC_TRUE, NULL) % DBBUCKETS;
for (dbentry = ISC_LIST_HEAD(acache->dbbucket[bucket]);
dbentry != NULL;
......@@ -1264,8 +1263,7 @@ dns_acache_setdb(dns_acache_t *acache, dns_db_t *db) {
dbentry->db = NULL;
dns_db_attach(db, &dbentry->db);
bucket = isc_hash_calc((const unsigned char *)&db,
sizeof(db), ISC_TRUE) % DBBUCKETS;
bucket = isc_hash_function(&db, sizeof(db), ISC_TRUE, NULL) % DBBUCKETS;
ISC_LIST_APPEND(acache->dbbucket[bucket], dbentry, link);
......@@ -1353,8 +1351,8 @@ dns_acache_putdb(dns_acache_t *acache, dns_db_t *db) {
INSIST(ISC_LIST_EMPTY(dbentry->originlist) &&
ISC_LIST_EMPTY(dbentry->referlist));
bucket = isc_hash_calc((const unsigned char *)&db,
sizeof(db), ISC_TRUE) % DBBUCKETS;
bucket = isc_hash_function(&db, sizeof(db), ISC_TRUE, NULL) % DBBUCKETS;
ISC_LIST_UNLINK(acache->dbbucket[bucket], dbentry, link);
dns_db_detach(&dbentry->db);
......
......@@ -1406,7 +1406,7 @@ dns_cache_dumpstats(dns_cache_t *cache, FILE *fp) {
"cache records deleted due to TTL expiration");
fprintf(fp, "%20u %s\n", dns_db_nodecount(cache->db),
"cache database nodes");
fprintf(fp, "%20u %s\n", dns_db_hashsize(cache->db),
fprintf(fp, "%20zu %s\n", dns_db_hashsize(cache->db),
"cache database hash buckets");
fprintf(fp, "%20u %s\n", (unsigned int) isc_mem_total(cache->mctx),
......
......@@ -875,12 +875,12 @@ dns_db_nodecount(dns_db_t *db) {
return ((db->methods->nodecount)(db));
}
unsigned int
size_t
dns_db_hashsize(dns_db_t *db) {
REQUIRE(DNS_DB_VALID(db));
if (db->methods->hashsize == NULL)
return (ISC_R_NOTIMPLEMENTED);
return (0);
return ((db->methods->hashsize)(db));
}
......
......@@ -194,7 +194,7 @@ typedef struct dns_dbmethods {
dns_rdataset_t *rdataset,
dns_rdataset_t *sigrdataset);
isc_result_t (*setcachestats)(dns_db_t *db, isc_stats_t *stats);
unsigned int (*hashsize)(dns_db_t *db);
size_t (*hashsize)(dns_db_t *db);
} dns_dbmethods_t;
typedef isc_result_t
......@@ -1380,7 +1380,7 @@ dns_db_nodecount(dns_db_t *db);
* \li The number of nodes in the database
*/
unsigned int
size_t
dns_db_hashsize(dns_db_t *db);
/*%<
* For database implementations using a hash table, report the
......@@ -1392,7 +1392,7 @@ dns_db_hashsize(dns_db_t *db);
*
* Returns:
* \li The number of buckets in the database's hash table, or
* ISC_R_NOTIMPLEMENTED.
* 0 if not implemented.
*/
void
......
......@@ -88,6 +88,7 @@ struct dns_rbtnode {
dns_rbtnode_t *right;
dns_rbtnode_t *down;
#ifdef DNS_RBT_USEHASH
dns_rbtnode_t *uppernode;
dns_rbtnode_t *hashnext;
#endif
......@@ -657,7 +658,7 @@ dns_rbt_nodecount(dns_rbt_t *rbt);
* \li rbt is a valid rbt manager.
*/
unsigned int
size_t
dns_rbt_hashsize(dns_rbt_t *rbt);
/*%<
* Obtain the current number of buckets in the 'rbt' hash table.
......
......@@ -29,6 +29,7 @@
#include <isc/mem.h>
#include <isc/once.h>
#include <isc/print.h>
#include <isc/random.h>
#include <isc/string.h>
#include <isc/thread.h>
#include <isc/util.h>
......@@ -478,42 +479,10 @@ dns_name_internalwildcard(const dns_name_t *name) {
return (ISC_FALSE);
}
static inline unsigned int
name_hash(dns_name_t *name, isc_boolean_t case_sensitive) {
unsigned int length;
const unsigned char *s;
unsigned int h = 0;
unsigned char c;
length = name->length;
if (length > 16)
length = 16;
/*
* This hash function is similar to the one Ousterhout
* uses in Tcl.
*/
s = name->ndata;
if (case_sensitive) {
while (length > 0) {
h += ( h << 3 ) + *s;
s++;
length--;
}
} else {
while (length > 0) {
c = maptolower[*s];
h += ( h << 3 ) + c;
s++;
length--;
}
}
return (h);
}
unsigned int
dns_name_hash(dns_name_t *name, isc_boolean_t case_sensitive) {
unsigned int length;
/*
* Provide a hash value for 'name'.
*/
......@@ -522,7 +491,12 @@ dns_name_hash(dns_name_t *name, isc_boolean_t case_sensitive) {
if (name->labels == 0)
return (0);
return (name_hash(name, case_sensitive));
length = name->length;
if (length > 16)
length = 16;
return (isc_hash_function_reverse(name->ndata, length,
case_sensitive, NULL));
}
unsigned int
......@@ -535,19 +509,17 @@ dns_name_fullhash(dns_name_t *name, isc_boolean_t case_sensitive) {
if (name->labels == 0)
return (0);
return (isc_hash_calc((const unsigned char *)name->ndata,
name->length, case_sensitive));
return (isc_hash_function_reverse(name->ndata, name->length,
case_sensitive, NULL));
}
unsigned int
dns_fullname_hash(dns_name_t *name, isc_boolean_t case_sensitive) {
/*
* This function was deprecated due to the breakage of the name space
* convention. We only keep this internally to provide binary backward
* convention. We only keep this internally to provide binary backward
* compatibility.
*/
REQUIRE(VALID_NAME(name));
return (dns_name_fullhash(name, case_sensitive));
}
......@@ -567,7 +539,8 @@ dns_name_hashbylabel(dns_name_t *name, isc_boolean_t case_sensitive) {
if (name->labels == 0)
return (0);
else if (name->labels == 1)
return (name_hash(name, case_sensitive));
return (isc_hash_function_reverse(name->ndata, name->length,
case_sensitive, NULL));
SETUP_OFFSETS(name, offsets, odata);
DNS_NAME_INIT(&tname, NULL);
......@@ -579,7 +552,8 @@ dns_name_hashbylabel(dns_name_t *name, isc_boolean_t case_sensitive) {
tname.length = name->length - offsets[i];
else
tname.length = offsets[i + 1] - offsets[i];
h += name_hash(&tname, case_sensitive);
h += isc_hash_function_reverse(tname.ndata, tname.length,
case_sensitive, NULL);
}
return (h);
......@@ -637,12 +611,15 @@ dns_name_fullcompare(const dns_name_t *name1, const dns_name_t *name2,
ldiff = l1 - l2;
}
offsets1 += l1;
offsets2 += l2;
while (l > 0) {
l--;
l1--;
l2--;
label1 = &name1->ndata[offsets1[l1]];
label2 = &name2->ndata[offsets2[l2]];
offsets1--;
offsets2--;
label1 = &name1->ndata[*offsets1];
label2 = &name2->ndata[*offsets2];
count1 = *label1++;
count2 = *label2++;
......@@ -658,16 +635,41 @@ dns_name_fullcompare(const dns_name_t *name1, const dns_name_t *name2,
else
count = count2;
while (count > 0) {
chdiff = (int)maptolower[*label1] -
(int)maptolower[*label2];
while (count > 3) {
chdiff = (int)maptolower[label1[0]] -
(int)maptolower[label2[0]];
if (chdiff != 0) {
*orderp = chdiff;
goto done;
}
chdiff = (int)maptolower[label1[1]] -
(int)maptolower[label2[1]];
if (chdiff != 0) {
*orderp = chdiff;
goto done;
}
chdiff = (int)maptolower[label1[2]] -
(int)maptolower[label2[2]];
if (chdiff != 0) {
*orderp = chdiff;
goto done;
}
chdiff = (int)maptolower[label1[3]] -
(int)maptolower[label2[3]];
if (chdiff != 0) {
*orderp = chdiff;
goto done;
}
count -= 4;
label1 += 4;
label2 += 4;
}
while (count-- > 0) {
chdiff = (int)maptolower[*label1++] - (int)maptolower[*label2++];
if (chdiff != 0) {
*orderp = chdiff;
goto done;
}
count--;
label1++;
label2++;
}
if (cdiff != 0) {
*orderp = cdiff;
......@@ -683,11 +685,12 @@ dns_name_fullcompare(const dns_name_t *name1, const dns_name_t *name2,
namereln = dns_namereln_subdomain;
else
namereln = dns_namereln_equal;
*nlabelsp = nlabels;
return (namereln);
done:
*nlabelsp = nlabels;
if (nlabels > 0 && namereln == dns_namereln_none)
if (nlabels > 0)
namereln = dns_namereln_commonancestor;
return (namereln);
......@@ -749,16 +752,31 @@ dns_name_equal(const dns_name_t *name1, const dns_name_t *name2) {
label1 = name1->ndata;
label2 = name2->ndata;
while (l > 0) {
l--;
while (l-- > 0) {
count = *label1++;
if (count != *label2++)
return (ISC_FALSE);
INSIST(count <= 63); /* no bitstring support */
while (count > 0) {
count--;
while (count > 3) {
c = maptolower[label1[0]];
if (c != maptolower[label2[0]])
return (ISC_FALSE);
c = maptolower[label1[1]];
if (c != maptolower[label2[1]])
return (ISC_FALSE);
c = maptolower[label1[2]];
if (c != maptolower[label2[2]])
return (ISC_FALSE);
c = maptolower[label1[3]];
if (c != maptolower[label2[3]])
return (ISC_FALSE);
count -= 4;
label1 += 4;
label2 += 4;
}
while (count-- > 0) {
c = maptolower[*label1++];
if (c != maptolower[*label2++])
return (ISC_FALSE);
......
......@@ -78,15 +78,15 @@
#endif
struct dns_rbt {
unsigned int magic;
isc_mem_t * mctx;
dns_rbtnode_t * root;
void (*data_deleter)(void *, void *);
void * deleter_arg;
unsigned int nodecount;
unsigned int hashsize;
dns_rbtnode_t ** hashtable;
void * mmap_location;
unsigned int magic;
isc_mem_t * mctx;
dns_rbtnode_t * root;
void (*data_deleter)(void *, void *);
void * deleter_arg;
unsigned int nodecount;
size_t hashsize;
dns_rbtnode_t ** hashtable;
void * mmap_location;
};
#define RED 0
......@@ -208,6 +208,9 @@ getdata(dns_rbtnode_t *node, file_header_t *header) {
#define LEFT(node) ((node)->left)
#define RIGHT(node) ((node)->right)
#define DOWN(node) ((node)->down)
#ifdef DNS_RBT_USEHASH
#define UPPERNODE(node) ((node)->uppernode)
#endif /* DNS_RBT_USEHASH */
#define DATA(node) ((node)->data)
#define IS_EMPTY(node) ((node)->data == NULL)
#define HASHNEXT(node) ((node)->hashnext)
......@@ -345,6 +348,39 @@ hexdump(const char *desc, unsigned char *data, size_t size) {
}
#endif /* DEBUG */
#ifdef DNS_RBT_USEHASH
/* Upper node is the parent of the root of the passed node's
* subtree. The passed node must not be NULL.
*/
static inline dns_rbtnode_t *
get_upper_node(dns_rbtnode_t *node) {
return (UPPERNODE(node));
}
static void
fixup_uppernodes_helper(dns_rbtnode_t *node, dns_rbtnode_t *uppernode) {
if (node == NULL)
return;
UPPERNODE(node) = uppernode;
fixup_uppernodes_helper(LEFT(node), uppernode);
fixup_uppernodes_helper(RIGHT(node), uppernode);
fixup_uppernodes_helper(DOWN(node), node);
}
/*
* This function is used to fixup uppernode members of all dns_rbtnodes
* after deserialization.
*/
static void
fixup_uppernodes(dns_rbt_t *rbt) {
fixup_uppernodes_helper(rbt->root, NULL);
}
#else
/* The passed node must not be NULL. */
static inline dns_rbtnode_t *
get_subtree_root(dns_rbtnode_t *node) {
......@@ -370,6 +406,8 @@ get_upper_node(dns_rbtnode_t *node) {
return (PARENT(root));
}
#endif /* DNS_RBT_USEHASH */
size_t
dns__rbtnode_getdistance(dns_rbtnode_t *node) {
size_t nodes = 1;
......@@ -421,11 +459,9 @@ treefix(dns_rbt_t *rbt, void *base, size_t size,
dns_rbtdatafixer_t datafixer, void *fixer_arg,
isc_uint64_t *crc);
static isc_result_t
deletetree(dns_rbt_t *rbt, dns_rbtnode_t *node);
static void
deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, dns_rbtnode_t **nodep);
deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, isc_boolean_t unhash,
dns_rbtnode_t **nodep);
static void
printnodename(dns_rbtnode_t *node, isc_boolean_t quoted, FILE *f);
......@@ -898,13 +934,19 @@ dns_rbt_deserialize_tree(void *base_address, size_t filesize,
goto cleanup;
}
if (header->nodecount != rbt->nodecount) {
result = ISC_R_INVALIDFILE;
goto cleanup;
}
#ifdef DNS_RBT_USEHASH
fixup_uppernodes(rbt);
#endif /* DNS_RBT_USEHASH */
*rbtp = rbt;
if (originp != NULL)
*originp = rbt->root;
if (header->nodecount != rbt->nodecount)
result = ISC_R_INVALIDFILE;
cleanup:
if (result != ISC_R_SUCCESS && rbt != NULL) {
rbt->root = NULL;
......@@ -976,7 +1018,7 @@ dns_rbt_destroy2(dns_rbt_t **rbtp, unsigned int quantum) {
rbt = *rbtp;
deletetreeflat(rbt, quantum, &rbt->root);
deletetreeflat(rbt, quantum, ISC_FALSE, &rbt->root);
if (rbt->root != NULL)
return (ISC_R_QUOTA);
......@@ -1003,7 +1045,7 @@ dns_rbt_nodecount(dns_rbt_t *rbt) {
return (rbt->nodecount);
}
unsigned int
size_t
dns_rbt_hashsize(dns_rbt_t *rbt) {
REQUIRE(VALID_RBT(rbt));
......@@ -1098,6 +1140,9 @@ dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) {
if (result == ISC_R_SUCCESS) {
rbt->nodecount++;
new_current->is_root = 1;
#ifdef DNS_RBT_USEHASH
UPPERNODE(new_current) = NULL;
#endif /* DNS_RBT_USEHASH */
rbt->root = new_current;
*nodep = new_current;
hash_node(rbt, new_current, name);
......@@ -1277,7 +1322,10 @@ dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) {
PARENT(current) = new_current;
DOWN(new_current) = current;
root = &DOWN(new_current);
#ifdef DNS_RBT_USEHASH
UPPERNODE(new_current) = UPPERNODE(current);
UPPERNODE(current) = new_current;
#endif /* DNS_RBT_USEHASH */
ADD_LEVEL(&chain, new_current);
LEFT(current) = NULL;
......@@ -1334,6 +1382,12 @@ dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) {
result = create_node(rbt->mctx, add_name, &new_current);
if (result == ISC_R_SUCCESS) {
#ifdef DNS_RBT_USEHASH
if (*root == NULL)
UPPERNODE(new_current) = current;
else
UPPERNODE(new_current) = PARENT(*root);
#endif /* DNS_RBT_USEHASH */
addonlevel(new_current, current, order, root);
rbt->nodecount++;
*nodep = new_current;
......@@ -1475,12 +1529,6 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname,
unsigned int tlabels = 1;
unsigned int hash;
/*
* If there is no hash table, hashing can't be done.
*/
if (rbt->hashtable == NULL)
goto nohash;
/*
* The case of current != current_root, that
* means a left or right pointer was followed,
......@@ -1495,7 +1543,7 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname,
/*
* current_root is the root of the current level, so
* it's parent is the same as it's "up" pointer.
* its parent is the same as its "up" pointer.
*/
up_current = PARENT(current_root);
dns_name_init(&hash_name, NULL);
......@@ -1579,8 +1627,8 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname,
current = NULL;
continue;
nohash:
#endif /* DNS_RBT_USEHASH */
#else /* DNS_RBT_USEHASH */
/*
* Standard binary search tree movement.
*/
......@@ -1589,6 +1637,8 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname,
else
current = RIGHT(current);
#endif /* DNS_RBT_USEHASH */
} else {
/*
* The names have some common suffix labels.
......@@ -2019,10 +2069,10 @@ dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, isc_boolean_t recurse)
INSIST(rbt->nodecount != 0);
if (DOWN(node) != NULL) {
if (recurse)
RUNTIME_CHECK(deletetree(rbt, DOWN(node))
== ISC_R_SUCCESS);
else {
if (recurse) {
PARENT(DOWN(node)) = NULL;
deletetreeflat(rbt, 0, ISC_TRUE, &DOWN(node));
} else {
if (DATA(node) != NULL && rbt->data_deleter != NULL)
rbt->data_deleter(DATA(node), rbt->deleter_arg);
DATA(node) = NULL;
......@@ -2034,6 +2084,7 @@ dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, isc_boolean_t recurse)
* by itself on a single level, so join_nodes() could
* be used to collapse the tree (with all the caveats
* of the comment at the start of this function).
* But join_nodes() function has now been removed.
*/
return (ISC_R_SUCCESS);
}
......@@ -2269,12 +2320,14 @@ rehash(dns_rbt_t *rbt, unsigned int newcount) {
unsigned int oldsize;
dns_rbtnode_t **oldtable;
dns_rbtnode_t *node;
dns_rbtnode_t *nextnode;
unsigned int hash;
unsigned int i;
oldsize = rbt->hashsize;
oldtable = rbt->hashtable;
do {
INSIST((rbt->hashsize * 2 + 1) > rbt->hashsize);
rbt->hashsize = rbt->hashsize * 2 + 1;
} while (newcount >= (rbt->hashsize * 3));
rbt->hashtable = isc_mem_get(rbt->mctx,
......@@ -2285,19 +2338,15 @@ rehash(dns_rbt_t *rbt, unsigned int newcount) {
return;
}
INSIST(rbt->hashsize > 0);
for (i = 0; i < rbt->hashsize; i++)
rbt->hashtable[i] = NULL;
for (i = 0; i < oldsize; i++) {
node = oldtable[i];
while (node != NULL) {
for (node = oldtable[i]; node != NULL; node = nextnode) {
hash = HASHVAL(node) % rbt->hashsize;
oldtable[i] = HASHNEXT(node);
nextnode = HASHNEXT(node);
HASHNEXT(node) = rbt->hashtable[hash];
rbt->hashtable[hash] = node;
node = oldtable[i];
}
}
......@@ -2321,19 +2370,17 @@ unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node) {
REQUIRE(DNS_RBTNODE_VALID(node));
if (rbt->hashtable != NULL) {
bucket = HASHVAL(node) % rbt->hashsize;
bucket_node = rbt->hashtable[bucket];
bucket = HASHVAL(node) % rbt->hashsize;
bucket_node = rbt->hashtable[bucket];
if (bucket_node == node)
rbt->hashtable[bucket] = HASHNEXT(node);
else {
while (HASHNEXT(bucket_node) != node) {