1526. [func] Implemented "additional section caching (or acache)",

			an internal cache framework for additional section
			content to improve response performance.  Several
			configuration options were provided to control the
			behavior.
parent 0e93730a
......@@ -770,7 +770,11 @@
1527. [cleanup] Reduce the number of gettimeofday() calls without
losing necessary timer granularity.
1526. [placeholder]
1526. [func] Implemented "additional section caching (or acache)",
an internal cache framework for additional section
content to improve response performance. Several
configuration options were provided to control the
behavior.
1525. [bug] dns_cache_create() could trigger a REQUIRE
failure in isc_mem_put() during error cleanup.
......
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: config.c,v 1.51 2004/10/05 02:47:50 marka Exp $ */
/* $Id: config.c,v 1.52 2004/12/21 10:45:15 jinmei Exp $ */
#include <config.h>
......@@ -125,6 +125,9 @@ options {\n\
check-names master fail;\n\
check-names slave warn;\n\
check-names response ignore;\n\
use-additional-cache true;\n\
acache-cleaning-interval 60;\n\
max-acache-size 0;\n\
dnssec-enable no; /* Make yes for 9.4. */ \n\
"
......
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: server.h,v 1.75 2004/10/11 05:30:19 marka Exp $ */
/* $Id: server.h,v 1.76 2004/12/21 10:45:15 jinmei Exp $ */
#ifndef NAMED_SERVER_H
#define NAMED_SERVER_H 1
......@@ -94,7 +94,8 @@ struct ns_server {
ns_controls_t * controls; /* Control channels */
unsigned int dispatchgen;
ns_dispatchlist_t dispatches;
dns_acache_t *acache;
};
#define NS_SERVER_MAGIC ISC_MAGIC('S','V','E','R')
......
This diff is collapsed.
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: server.c,v 1.433 2004/11/10 22:14:28 marka Exp $ */
/* $Id: server.c,v 1.434 2004/12/21 10:45:15 jinmei Exp $ */
#include <config.h>
......@@ -41,6 +41,7 @@
#include <bind9/check.h>
#include <dns/acache.h>
#include <dns/adb.h>
#include <dns/cache.h>
#include <dns/db.h>
......@@ -733,6 +734,7 @@ configure_view(dns_view_t *view, cfg_obj_t *config, cfg_obj_t *vconfig,
isc_result_t result;
isc_uint32_t max_adb_size;
isc_uint32_t max_cache_size;
isc_uint32_t max_acache_size;
isc_uint32_t lame_ttl;
dns_tsig_keyring_t *ring;
dns_view_t *pview = NULL; /* Production view */
......@@ -776,6 +778,51 @@ configure_view(dns_view_t *view, cfg_obj_t *config, cfg_obj_t *vconfig,
CHECKM(ns_config_getport(config, &port), "port");
dns_view_setdstport(view, port);
/*
* Create additional cache for this view and zones under the view
* unless explicitly disabled.
*/
obj = NULL;
ns_config_get(maps, "use-additional-cache", &obj);
if (obj == NULL || cfg_obj_asboolean(obj)) {
cmctx = NULL;
CHECK(isc_mem_create(0, 0, &cmctx));
CHECK(dns_acache_create(&view->acache, cmctx, ns_g_taskmgr,
ns_g_timermgr));
isc_mem_detach(&cmctx);
}
if (view->acache != NULL) {
obj = NULL;
result = ns_config_get(maps, "acache-cleaning-interval", &obj);
INSIST(result == ISC_R_SUCCESS);
dns_acache_setcleaninginterval(view->acache,
cfg_obj_asuint32(obj) * 60);
obj = NULL;
result = ns_config_get(maps, "max-acache-size", &obj);
INSIST(result == ISC_R_SUCCESS);
if (cfg_obj_isstring(obj)) {
str = cfg_obj_asstring(obj);
INSIST(strcasecmp(str, "unlimited") == 0);
max_acache_size = ISC_UINT32_MAX;
} else {
isc_resourcevalue_t value;
value = cfg_obj_asuint64(obj);
if (value > ISC_UINT32_MAX) {
cfg_obj_log(obj, ns_g_lctx, ISC_LOG_ERROR,
"'max-acache-size "
"%" ISC_PRINT_QUADFORMAT
"d' is too large",
value);
result = ISC_R_RANGE;
goto cleanup;
}
max_acache_size = (isc_uint32_t)value;
}
dns_acache_setcachesize(view->acache, max_acache_size);
}
/*
* Configure the zones.
*/
......@@ -1737,6 +1784,8 @@ configure_zone(cfg_obj_t *config, cfg_obj_t *zconfig, cfg_obj_t *vconfig,
* new view.
*/
dns_zone_setview(zone, view);
if (view->acache != NULL)
dns_zone_setacache(zone, view->acache);
} else {
/*
* We cannot reuse an existing zone, we have
......@@ -1745,6 +1794,8 @@ configure_zone(cfg_obj_t *config, cfg_obj_t *zconfig, cfg_obj_t *vconfig,
CHECK(dns_zone_create(&zone, mctx));
CHECK(dns_zone_setorigin(zone, origin));
dns_zone_setview(zone, view);
if (view->acache != NULL)
dns_zone_setacache(zone, view->acache);
CHECK(dns_zonemgr_managezone(ns_g_server->zonemgr, zone));
}
......
......@@ -2,7 +2,7 @@
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.0//EN"
"http://www.oasis-open.org/docbook/xml/4.0/docbookx.dtd">
<!-- File: $Id: Bv9ARM-book.xml,v 1.259 2004/11/11 01:08:24 marka Exp $ -->
<!-- File: $Id: Bv9ARM-book.xml,v 1.260 2004/12/21 10:45:16 jinmei Exp $ -->
<book>
<title>BIND 9 Administrator Reference Manual</title>
......@@ -392,7 +392,12 @@ signed zones, serving many thousands of queries per second.</para></sect1>
cache and zones loaded off disk. The <command>max-cache-size</command>
option can be used to limit the amount of memory used by the cache,
at the expense of reducing cache hit rates and causing more <acronym>DNS</acronym>
traffic. It is still good practice to have enough memory to load
traffic.
Additionally, if additional section caching
(<xref linkend="acache"/>) is enabled,
the <command>max-acache-size</command> can be used to limit the amount
of memory used by the mechanism.
It is still good practice to have enough memory to load
all zone and cache data into memory &mdash; unfortunately, the best way
to determine this for a given installation is to watch the name server
in operation. After a few weeks the server process should reach
......@@ -2800,8 +2805,11 @@ statement in the <filename>named.conf</filename> file:</para>
<optional> edns-udp-size <replaceable>number</replaceable>; </optional>
<optional> root-delegation-only <optional> exclude { <replaceable>namelist</replaceable> } </optional> ; </optional>
<optional> querylog <replaceable>yes_or_no</replaceable> ; </optional>
};
<optional> disable-algorithms <replaceable>domain</replaceable> { <replaceable>algorithm</replaceable>; <optional> <replaceable>algorithm</replaceable>; </optional> }; </optional>
<optional> use-additional-cache <replaceable>yes_or_no</replaceable> ; </optional>
<optional> acache-cleaning-interval <replaceable>number</replaceable>; </optional>
<optional> max-acache-size <replaceable>size_spec</replaceable> ; </optional>
};
</programlisting>
</sect2>
......@@ -4346,6 +4354,99 @@ to be incremented, and may additionally cause the
</sect3>
<sect3 id="acache">
<title>Additional Section Caching</title>
<para>
The additional section cache, also called <command>acache,</command>
is an internal cache to improve the response performance of BIND 9.
When the additional section caching is enabled, BIND 9 will
cache internal short-cut to the additional section content for each
answer RR.
Note that acache is an internal caching mechanism of BIND 9, and is
not relevant to the DNS caching server function.
</para>
<para>
The additional section caching does not make any difference on the
response content (except the RRsets ordering of the additional
section, see below), but can improve the response performance significantly.
It is particularly effective when BIND 9 acts as an authoritative server
for a zone that has many delegations with many glue RRs.
</para>
<para>
In order to achieve the maximum performance improvement by acache,
it is recommended to set <command>additional-from-cache</command>
to <command>no</command>, since the current implementation of acache
does not make a short-cut of additional section information from a DNS
cache data.
</para>
<para>
One obvious disadvantage of acache is that it requires much more
memory for the internal cached data.
Thus, if the response performance does not matter and memory
consumption is much more severe, the acache mechanism can be
disabled by setting <command>use-additional-cache</command> to
<command>no</command>.
It is also possible to specify the upper limit of memory consumption
for acache by <command>max-acache-size</command>.
</para>
<para>
The additional section caching also has a minor effect on the RRset
ordering in the additional section.
Without acache, the "cyclic" order is effective for the additional
section as well as the answer and authority sections.
However, the additional section caching fixes the ordering when it
first caches an RRset for the additional section, and the same
ordering will be kept in succeeding responses, regardless of the
configuration for <command>rrset-order</command>.
This should be minor, though, since an RRset in the additional section
typically only contains a small number of RRs (and in many cases it
only contains a single RR), in which case the
ordering does not matter much.
</para>
<para>
The following is a summary of options related to acache.
</para>
<variablelist>
<varlistentry><term><command>use-additional-cache</command></term>
<listitem><para>
If yes, the additional section caching is enabled.
The default value is yes.
</para>
</listitem></varlistentry>
<varlistentry><term><command>acache-cleaning-interval</command></term>
<listitem><para>
The server will remove stale cache entries, based on an LRU based
algorithm, every <command>acache-cleaning-interval</command> minutes.
The default is 60 minutes.
If set to 0, no periodic cleaning will occur.
</para>
</listitem></varlistentry>
<varlistentry><term><command>max-acache-size</command></term>
<listitem><para>
The maximum amount of memory to use for the server's acache, in bytes.
When the amount of data in the acache reaches this limit, the server
will cause more aggressive cleaning so that the limit is not exceeded.
In a server with multiple views, the limit applies separately to the
acache of each view.
The default is <literal>unlimited</literal>, meaning that
entries are purged from acache only at the periodic cleaning time.
</para>
</listitem></varlistentry>
</variablelist>
</sect3>
</sect2>
<sect2 id="server_statement_grammar">
......
......@@ -13,7 +13,7 @@
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
# $Id: Makefile.in,v 1.148 2004/12/09 01:40:59 marka Exp $
# $Id: Makefile.in,v 1.149 2004/12/21 10:45:16 jinmei Exp $
srcdir = @srcdir@
VPATH = @srcdir@
......@@ -49,7 +49,7 @@ DSTOBJS = dst_api.@O@ dst_lib.@O@ dst_parse.@O@ dst_result.@O@ \
opensslrsa_link.@O@
# Alphabetically
DNSOBJS = acl.@O@ adb.@O@ byaddr.@O@ \
DNSOBJS = acache.@O@ acl.@O@ adb.@O@ byaddr.@O@ \
cache.@O@ callbacks.@O@ compress.@O@ \
db.@O@ dbiterator.@O@ dbtable.@O@ diff.@O@ dispatch.@O@ \
dnssec.@O@ ds.@O@ forward.@O@ journal.@O@ keytable.@O@ \
......@@ -73,7 +73,7 @@ DSTSRCS = dst_api.c dst_lib.c dst_parse.c \
openssl_link.c openssldh_link.c \
openssldsa_link.c opensslrsa_link.c
DNSSRCS = acl.c adb.c byaddr.c \
DNSSRCS = acache.c acl.c adb.c byaddr.c \
cache.c callbacks.c compress.c \
db.c dbiterator.c dbtable.c diff.c dispatch.c \
dnssec.c ds.c forward.c journal.c keytable.c \
......
This diff is collapsed.
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: db.c,v 1.74 2004/03/05 05:09:18 marka Exp $ */
/* $Id: db.c,v 1.75 2004/12/21 10:45:16 jinmei Exp $ */
/***
*** Imports
......@@ -791,3 +791,51 @@ dns_db_unregister(dns_dbimplementation_t **dbimp) {
isc_mem_detach(&mctx);
RWUNLOCK(&implock, isc_rwlocktype_write);
}
isc_result_t
dns_db_getsoanode(dns_db_t *db, dns_dbnode_t **nodep) {
REQUIRE(DNS_DB_VALID(db));
REQUIRE(dns_db_iszone(db) == ISC_TRUE);
REQUIRE(nodep != NULL && *nodep == NULL);
if (db->methods->getsoanode != NULL)
return ((db->methods->getsoanode)(db, nodep));
return (ISC_R_NOTFOUND);
}
isc_result_t
dns_db_setsoanode(dns_db_t *db, dns_dbnode_t *node) {
REQUIRE(DNS_DB_VALID(db));
REQUIRE(dns_db_iszone(db) == ISC_TRUE);
REQUIRE(node != NULL);
if (db->methods->setsoanode != NULL)
return ((db->methods->setsoanode)(db, node));
return (ISC_R_FAILURE);
}
isc_result_t
dns_db_getnsnode(dns_db_t *db, dns_dbnode_t **nodep) {
REQUIRE(DNS_DB_VALID(db));
REQUIRE(dns_db_iszone(db) == ISC_TRUE);
REQUIRE(nodep != NULL && *nodep == NULL);
if (db->methods->getnsnode != NULL)
return ((db->methods->getnsnode)(db, nodep));
return (ISC_R_NOTFOUND);
}
isc_result_t
dns_db_setnsnode(dns_db_t *db, dns_dbnode_t *node) {
REQUIRE(DNS_DB_VALID(db));
REQUIRE(dns_db_iszone(db) == ISC_TRUE);
REQUIRE(node != NULL);
if (db->methods->setnsnode != NULL)
return ((db->methods->setnsnode)(db, node));
return (ISC_R_FAILURE);
}
/*
* Copyright (C) 2003 Internet Software Consortium.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM
* DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
* INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: acache.h,v 1.2 2004/12/21 10:45:18 jinmei Exp $ */
#ifndef DNS_ACACHE_H
#define DNS_ACACHE_H 1
/*****
***** Module Info
*****/
/*
* Acache
*
* The Additional Cache Object
*
* This module manages internal caching entries that correspond to
* the additional section data of a DNS DB node (an RRset header, more
* accurately). An additional cache entry is expected to be (somehow)
* attached to a particular RR in a particular DB node, and contains a set
* of information of an additional data for the DB node.
*
* An additional cache object is intended to be created as a per-view
* object, and manages all cache entries within the view.
*
* The intended usage of the additional caching is to provide a short cut
* to additional glue RRs of an NS RR. For each NS RR, it is often
* necessary to look for glue RRs to make a proper response. Once the
* glue RRs are known, the additional caching allows the client to
* associate the information to the original NS RR so that further
* expensive lookups can be avoided for the NS RR.
*
* Each additional cache entry contains information to identify a
* particular DB node and (optionally) an associated RRset. The
* information consists of its zone, database, the version of the
* database, database node, and RRset.
*
* A "negative" information can also be cached. For example, if a glue
* RR does not exist as an authoritative data in the same zone as that
* of the NS RR, this fact can be cached by specifying a NULL pointer
* for the database, version, and node. (See the description for
* dns_acache_getentry() below for more details.)
*
* Since each member stored in an additional cache entry holds a reference
* to a corresponding object, a stale cache entry may cause unnecessary
* memory consumption. For instance, when a zone is reloaded, additional
* cache entries that have a reference to the zone (and its DB and/or
* DB nodes) can delay the cleanup of the referred objects. In order to
* minimize such a bad effect, this module provides several cleanup
* mechanisms.
*
* The first one is a shutdown procedure called when the associated view
* is shut down. In this case, dns_acache_shutdown() will be called and
* all cache entries will be purged. This mechanism will help the
* situation when the configuration is reloaded or the main server is
* stopped.
*
* Per-DB cleanup mechanism is also provided. Each additional cache entry
* is associated with related DB, which is expected to have been
* registered when the DB was created by dns_acache_setdb(). If a
* particular DB is going to be destroyed, the primary holder of the DB,
* a typical example of which is a zone, will call dns_acache_putdb().
* Then this module will clean-up all cache entries associated with the
* DB. This mechanism is effective when a secondary zone DB is going to
* be stale after a zone transfer.
*
* Finally, this module supports for periodic clean-up of stale entries.
* Each cache entry has a timestamp field, which is updated every time
* the entry is referred. A periodically invoked cleaner checks the
* timestamp of each entry, and purge entries that have not been referred
* for a certain period. The cleaner interval can be specified by
* dns_acache_setcleaninginterval(). If the periodic clean-up is not
* enough, it is also possible to specify the upper limit of entries
* in terms of the memory consumption. If the maximum value is
* specified, the cleaner is invoked when the memory consumption reaches
* the high watermark inferred from the maximum value. In this case,
* the cleaner will use more aggressive algorithm to decide the "victim"
* entries. The maximum value can be specified by
* dns_acache_setcachesize().
*
* When a cache entry is going to be purged within this module, the
* callback function specified at the creation time will be called.
* The callback function is expected to release all internal resources
* related to the entry, which will typically be specific to DB
* implementation, and to call dns_acache_detachentry(). The callback
* mechanism is very important, since the holder of an additional cache
* entry may not be able to initiate the clean-up of the entry, due to
* the reference ordering. For example, as long as an additional cache
* entry has a reference to a DB object, the DB cannot be freed, in which
* a DB node may have a reference to the cache entry.
*
* Credits:
* The basic idea of this kind of short-cut for frequently used
* information is similar to the "pre-compiled answer" approach adopted
* in nsd by NLnet LABS with RIPE NCC. Our work here is an independent
* effort, but the success of nsd encouraged us to pursue this path.
*
* The design and implementation of the periodic memory management and
* the upper limitation of memory consumption was derived from the cache
* DB implementation of BIND9.
*
* MP:
* There are two main locks in this module. One is for each entry, and
* the other is for the additional cache object.
*
* Reliability:
* The callback function for a cache entry is called with holding the
* entry lock. Thus, it implicitly assumes the callback function does not
* call a function that can require the lock. Typically, the only
* function that can be called from the callback function safely is
* dns_acache_detachentry(). The breakage of this implicit assumption
* may cause a deadlock.
*
* Resources:
* In a 32-bit architecture (such as i386), the following additional
* memory is required comparing to the case that disables this module.
* - 76 bytes for each additional cache entry
* - if the entry has a DNS name and associated RRset,
* * 44 bytes + size of the name (1-255 bytes)
* * 52 bytes x number_of_RRs
* - 28 bytes for each DB related to this module
*
* Using the additional cache also requires extra memory consumption in
* the DB implementation. In the current implementation for rbtdb, we
* need:
* - two additional pointers for each DB node (8 bytes for a 32-bit
* architecture
* - for each RR associated to an RR in a DB node, we also need
* a pointer and management objects to support the additional cache
* function. These are allocated on-demand. The total size is
* 32 bytes for a 32-bit architecture.
*
* Security:
* Since this module does not handle any low-level data directly,
* no security issue specific to this module is anticipated.
*
* Standards:
* None.
*/
/***
*** Imports
***/
#include <isc/mutex.h>
#include <isc/lang.h>
#include <isc/refcount.h>
#include <isc/stdtime.h>
#include <dns/types.h>
/***
*** Functions
***/
ISC_LANG_BEGINDECLS
isc_result_t
dns_acache_create(dns_acache_t **acachep, isc_mem_t *mctx,
isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr);
/*
* Create a new DNS additional cache object.
*
* Requires:
*
* 'mctx' is a valid memory context
*
* 'taskmgr' is a valid task manager
*
* 'timermgr' is a valid timer or NULL. If NULL, no periodic cleaning of
* the cache will take place.
*
* 'acachep' is a valid pointer, and *acachep == NULL
*
* Ensures:
*
* '*acachep' is attached to the newly created cache
*
* Returns:
*
* ISC_R_SUCCESS
* ISC_R_NOMEMORY
* ISC_R_UNEXPECTED
*/
void
dns_acache_attach(dns_acache_t *source, dns_acache_t **targetp);
/*
* Attach *targetp to cache.
*
* Requires:
*
* 'acache' is a valid additional cache.
*
* 'targetp' points to a NULL dns_acache_t *.
*
* Ensures:
*
* *targetp is attached to the 'source' additional cache.
*/
void
dns_acache_detach(dns_acache_t **acachep);
/*
* Detach *acachep from its cache.
*
* Requires:
*
* '*acachep' points to a valid additional cache.
*
* Ensures:
*
* *acachep is NULL.
*
* If '*acachep' is the last reference to the cache and the additional
* cache does not have an outstanding task, all resources used by the
* cache will be freed.
*/
void
dns_acache_setcleaninginterval(dns_acache_t *acache, unsigned int t);
/*
* Set the periodic cleaning interval of an additional cache to 'interval'
* seconds.
*/
void
dns_acache_setcachesize(dns_acache_t *acache, isc_uint32_t size);
/*
* Set the maximum additional cache size. 0 means unlimited.
*/
isc_result_t
dns_acache_setdb(dns_acache_t *acache, dns_db_t *db);
/*
* Set 'db' in 'acache' when the db can be referred from acache, in order
* to provide a hint for resolving the back reference.
*
* Requires:
* 'acache' is a valid acache pointer.
* 'db' is a valid DNS DB pointer.
*
* Ensures:
* 'acache' will have a reference to 'db'.
*
* Returns:
* ISC_R_SUCCESS
* ISC_R_EXISTS (which means the specified 'db' is already set)
* ISC_R_NOMEMORY
*/
isc_result_t
dns_acache_putdb(dns_acache_t *acache, dns_db_t *db);
/*
* Release 'db' from 'acache' if it has been set by dns_acache_setdb().
*
* Requires:
* 'acache' is a valid acache pointer.
* 'db' is a valid DNS DB pointer.
*
* Ensures:
* 'acache' will release the reference to 'db'. Additionally, the content
* of each cache entry that is related to the 'db' will be released via
* the callback function.
*
* Returns:
* ISC_R_SUCCESS
* ISC_R_NOTFOUND (which means the specified 'db' is not set in 'acache')
* ISC_R_NOMEMORY
*/
void
dns_acache_shutdown(dns_acache_t *acache);
/*