Commit 03be5a6b authored by Mukund Sivaraman's avatar Mukund Sivaraman
Browse files

Improve performance for delegation heavy answers and also general query performance (#44029)

parent 4c31eda5
......@@ -6,8 +6,6 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
/* $Id: named.conf,v 1.16 2007/06/18 23:47:31 tbox Exp $ */
controls { /* empty */ };
options {
......@@ -19,8 +17,8 @@ options {
listen-on { 10.53.0.3; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
minimal-responses no;
};
zone "." {
......
......@@ -6,8 +6,6 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
/* $Id: named.conf.in,v 1.10 2011/11/03 23:46:26 tbox Exp $ */
controls { /* empty */ };
options {
......
......@@ -6,8 +6,6 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# $Id: tests.sh,v 1.11 2011/11/03 23:46:26 tbox Exp $
SYSTEMTESTTOP=..
. $SYSTEMTESTTOP/conf.sh
......@@ -41,7 +39,7 @@ do
echo "I:checking the new key"
ret=0
$DIG $DIGOPTS . ns -k $keyname > dig.out.1 || ret=1
$DIG $DIGOPTS txt txt.example -k $keyname > dig.out.1 || ret=1
grep "status: NOERROR" dig.out.1 > /dev/null || ret=1
grep "TSIG.*hmac-md5.*NOERROR" dig.out.1 > /dev/null || ret=1
grep "Some TSIG could not be validated" dig.out.1 > /dev/null && ret=1
......@@ -60,7 +58,7 @@ do
echo "I:checking that new key has been deleted"
ret=0
$DIG $DIGOPTS . ns -k $keyname > dig.out.2 || ret=1
$DIG $DIGOPTS txt txt.example -k $keyname > dig.out.2 || ret=1
grep "status: NOERROR" dig.out.2 > /dev/null && ret=1
grep "TSIG.*hmac-md5.*NOERROR" dig.out.2 > /dev/null && ret=1
grep "Some TSIG could not be validated" dig.out.2 > /dev/null || ret=1
......
......@@ -24,7 +24,6 @@ options {
listen-on { 10.53.0.1; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
};
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.2; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
};
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.3; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
};
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.2; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
};
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.3; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
allow-v6-synthesis { any; };
};
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.3; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
notify yes;
};
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.1; };
listen-on-v6 { none; };
recursion no;
acache-enable yes;
};
zone "." {
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.2; };
listen-on-v6 { none; };
recursion no;
acache-enable yes;
};
zone "example" {
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.3; };
listen-on-v6 { none; };
recursion yes;
acache-enable yes;
};
zone "." {
......
......@@ -19,7 +19,6 @@ options {
listen-on { 10.53.0.4; };
listen-on-v6 { none; };
recursion no;
acache-enable yes;
};
zone "example" {
......
......@@ -578,11 +578,6 @@
option can be used to limit the amount of memory used by the cache,
at the expense of reducing cache hit rates and causing more <acronym>DNS</acronym>
traffic.
Additionally, if additional section caching
(<xref linkend="acache"/>) is enabled,
the <command>max-acache-size</command> option can be used to
limit the amount
of memory used by the mechanism.
It is still good practice to have enough memory to load
all zone and cache data into memory — unfortunately, the best
way
......@@ -4626,8 +4621,6 @@ badresp:1,adberr:0,findfail:0,valfail:0]
[ <command>nta-recheck</command> <replaceable>duration</replaceable> ; ]
[ <command>port</command> <replaceable>ip_port</replaceable> ; ]
[ <command>dscp</command> <replaceable>ip_dscp</replaceable> ; ]
[ <command>additional-from-auth</command> <replaceable>yes_or_no</replaceable> ; ]
[ <command>additional-from-cache</command> <replaceable>yes_or_no</replaceable> ; ]
[ <command>random-device</command> <replaceable>path_name</replaceable> ; ]
[ <command>max-cache-size</command> <replaceable>size_or_percent</replaceable> ; ]
[ <command>match-mapped-addresses</command> <replaceable>yes_or_no</replaceable> ; ]
......@@ -4653,9 +4646,6 @@ badresp:1,adberr:0,findfail:0,valfail:0]
[ <command>querylog</command> <replaceable>yes_or_no</replaceable> ; ]
[ <command>disable-algorithms</command> <replaceable>domain</replaceable> <command>{</command> <replaceable>algorithm</replaceable> ; ... <command>}</command> ; ]
[ <command>disable-ds-digests</command> <replaceable>domain</replaceable> <command>{</command> <replaceable>digest_type</replaceable> ; ... <command>}</command> ; ]
[ <command>acache-enable</command> <replaceable>yes_or_no</replaceable> ; ]
[ <command>acache-cleaning-interval</command> <replaceable>number</replaceable> ; ]
[ <command>max-acache-size</command> <replaceable>size_spec</replaceable> ; ]
[ <command>max-recursion-depth</command> <replaceable>number</replaceable> ; ]
[ <command>max-recursion-queries</command> <replaceable>number</replaceable> ; ]
[ <command>masterfile-format</command> ( <option>text</option> | <option>raw</option> | <option>map</option> ) ; ]
......@@ -6342,7 +6332,7 @@ options {
both authoritative and recursive queries.
</para>
<para>
The default is <userinput>no</userinput>.
The default is <userinput>yes</userinput>.
</para>
</listitem>
</varlistentry>
......@@ -6733,94 +6723,6 @@ options {
</listitem>
</varlistentry>
 
<varlistentry>
<term><command>additional-from-auth</command></term>
<term><command>additional-from-cache</command></term>
<listitem>
<para>
These options control the behavior of an authoritative
server when
answering queries which have additional data, or when
following CNAME
and DNAME chains.
</para>
<para>
When both of these options are set to <userinput>yes</userinput>
(the default) and a
query is being answered from authoritative data (a zone
configured into the server), the additional data section of
the
reply will be filled in using data from other authoritative
zones
and from the cache. In some situations this is undesirable,
such
as when there is concern over the correctness of the cache,
or
in servers where slave zones may be added and modified by
untrusted third parties. Also, avoiding
the search for this additional data will speed up server
operations
at the possible expense of additional queries to resolve
what would
otherwise be provided in the additional section.
</para>
<para>
For example, if a query asks for an MX record for host <literal>foo.example.com</literal>,
and the record found is "<literal>MX 10 mail.example.net</literal>", normally the address
records (A and AAAA) for <literal>mail.example.net</literal> will be provided as well,
if known, even though they are not in the example.com zone.
Setting these options to <command>no</command>
disables this behavior and makes
the server only search for additional data in the zone it
answers from.
</para>
<para>
These options are intended for use in authoritative-only
servers, or in authoritative-only views. Attempts to set
them to <command>no</command> without also
specifying
<command>recursion no</command> will cause the
server to
ignore the options and log a warning message.
</para>
<para>
Specifying <command>additional-from-cache no</command> actually
disables the use of the cache not only for additional data
lookups
but also when looking up the answer. This is usually the
desired
behavior in an authoritative-only server where the
correctness of
the cached data is an issue.
</para>
<para>
When a name server is non-recursively queried for a name
that is not
below the apex of any served zone, it normally answers with
an
"upwards referral" to the root servers or the servers of
some other
known parent of the query name. Since the data in an
upwards referral
comes from the cache, the server will not be able to provide
upwards
referrals when <command>additional-from-cache no</command>
has been specified. Instead, it will respond to such
queries
with REFUSED. This should not cause any problems since
upwards referrals are not required for the resolution
process.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>match-mapped-addresses</command></term>
<listitem>
......@@ -9055,7 +8957,7 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
 
<para>
The response to a DNS query may consist of multiple resource
records (RRs) forming a resource records set (RRset).
records (RRs) forming a resource record set (RRset).
The name server will normally return the
RRs within the RRset in an indeterminate order
(but see the <command>rrset-order</command>
......@@ -9169,17 +9071,14 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
<para>
When multiple records are returned in an answer it may be
useful to configure the order of the records placed into the
response.
The <command>rrset-order</command> statement permits
configuration
of the ordering of the records in a multiple record response.
response. The <command>rrset-order</command> statement permits
configuration of the ordering of the records in a
multiple-record response.
See also the <command>sortlist</command> statement,
<xref linkend="the_sortlist_statement"/>.
</para>
<para>
An <command>order_spec</command> is defined as
follows:
An <command>order_spec</command> is defined as follows:
</para>
<para>
<optional>class <replaceable>class_name</replaceable></optional>
......@@ -9207,7 +9106,10 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
<entry colname="2">
<para>
Records are returned in the order they
are defined in the zone file.
are defined in the zone file. This option
is only available if <acronym>BIND</acronym>
is configured with "--enable-fixed-rrset" at
compile time.
</para>
</entry>
</row>
......@@ -9227,29 +9129,45 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
</entry>
<entry colname="2">
<para>
Records are returned in a cyclic round-robin order.
Records are returned in a cyclic round-robin order,
rotating by one record per query.
</para>
<para>
If <acronym>BIND</acronym> is configured with the
"--enable-fixed-rrset" option at compile time, then
If <acronym>BIND</acronym> is configured with
"--enable-fixed-rrset" at compile time, then
the initial ordering of the RRset will match the
one specified in the zone file.
one specified in the zone file; otherwise the
initial ordering is indeterminate.
</para>
</entry>
</row>
<row rowsep="0">
<entry colname="1">
<para><command>none</command></para>
</entry>
<entry colname="2">
<para>
Records are returned in whatever order they were
retrieved from the database. This order is
indeterminate, but will be consistent as long as the
database is not modified. When no ordering is
specified, this is the default.
</para>
</entry>
</row>
</tbody>
</tgroup>
</informaltable>
<para>
</para>
<para>
For example:
</para>
<programlisting>rrset-order {
class IN type A name "host.example.com" order random;
order cyclic;
};
</programlisting>
<para>
will cause any responses for type A records in class IN that
have "<literal>host.example.com</literal>" as a
......@@ -9261,7 +9179,8 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
appear, they are not combined — the last one applies.
</para>
<para>
By default, all records are returned in random order.
By default, records are returned in indeterminate but
consistent order (see <command>none</command> above).
</para>
 
<note>
......@@ -10020,121 +9939,6 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
</variablelist>
</section>
 
<section xml:id="acache"><info><title>Additional Section Caching</title></info>
<para>
The additional section cache, also called <command>acache</command>,
is an internal cache to improve the response performance of BIND 9.
When additional section caching is enabled, BIND 9 will
cache an internal short-cut to the additional section content for
each answer RR.
Note that <command>acache</command> is an internal caching
mechanism of BIND 9, and is not related to the DNS caching
server function.
</para>
<para>
Additional section caching does not change the
response content (except the RRsets ordering of the additional
section, see below), but can improve the response performance
significantly.
It is particularly effective when BIND 9 acts as an authoritative
server for a zone that has many delegations with many glue RRs.
</para>
<para>
In order to obtain the maximum performance improvement
from additional section caching, setting
<command>additional-from-cache</command>
to <command>no</command> is recommended, since the current
implementation of <command>acache</command>
does not short-cut of additional section information from the
DNS cache data.
</para>
<para>
One obvious disadvantage of <command>acache</command> is
that it requires much more
memory for the internal cached data.
Thus, if the response performance does not matter and memory
consumption is much more critical, the
<command>acache</command> mechanism can be
disabled by setting <command>acache-enable</command> to
<command>no</command>.
It is also possible to specify the upper limit of memory
consumption
for acache by using <command>max-acache-size</command>.
</para>
<para>
Additional section caching also has a minor effect on the
RRset ordering in the additional section.
Without <command>acache</command>,
<command>cyclic</command> order is effective for the additional
section as well as the answer and authority sections.
However, additional section caching fixes the ordering when it
first caches an RRset for the additional section, and the same
ordering will be kept in succeeding responses, regardless of the
setting of <command>rrset-order</command>.
The effect of this should be minor, however, since an
RRset in the additional section
typically only contains a small number of RRs (and in many cases
it only contains a single RR), in which case the
ordering does not matter much.
</para>
<para>
The following is a summary of options related to
<command>acache</command>.
</para>
<variablelist>
<varlistentry>
<term><command>acache-enable</command></term>
<listitem>
<para>
If <command>yes</command>, additional section caching is
enabled. The default value is <command>no</command>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>acache-cleaning-interval</command></term>
<listitem>
<para>
The server will remove stale cache entries, based on an LRU
based
algorithm, every <command>acache-cleaning-interval</command> minutes.
The default is 60 minutes.
If set to 0, no periodic cleaning will occur.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><command>max-acache-size</command></term>
<listitem>
<para>
The maximum amount of memory in bytes to use for the server's acache.
When the amount of data in the acache reaches this limit,
the server
will clean more aggressively so that the limit is not
exceeded.
In a server with multiple views, the limit applies
separately to the
acache of each view.
The default is <literal>16M</literal>.
</para>
</listitem>
</varlistentry>
</variablelist>
</section>
<section xml:id="content_filtering"><info><title>Content Filtering</title></info>
 
<para>
......
......@@ -64,10 +64,10 @@ masters <string> [ port <integer> ] [ dscp
<integer> ] ) [ key <string> ]; ... }; // may occur multiple times
options {
acache-cleaning-interval <integer>;
acache-enable <boolean>;
additional-from-auth <boolean>;
additional-from-cache <boolean>;
acache-cleaning-interval <integer>; // obsolete
acache-enable <boolean>; // obsolete
additional-from-auth <boolean>; // obsolete
additional-from-cache <boolean>; // obsolete
allow-new-zones <boolean>;
allow-notify { <address_match_element>; ... };
allow-query { <address_match_element>; ... };
......@@ -213,7 +213,7 @@ options {
masterfile-format ( map | raw | text );
masterfile-style ( full | relative );
match-mapped-addresses <boolean>;
max-acache-size ( unlimited | <sizeval> );
max-acache-size ( unlimited | <sizeval> ); // obsolete
max-cache-size ( default | unlimited | <sizeval> | <percentage> );
max-cache-ttl <integer>;
max-clients-per-query <integer>;
......@@ -418,10 +418,10 @@ trusted-keys { <string> <integer> <integer>
<integer> <quoted_string>; ... }; // may occur multiple times
view <string> [ <class> ] {
acache-cleaning-interval <integer>;
acache-enable <boolean>;
additional-from-auth <boolean>;
additional-from-cache <boolean>;
acache-cleaning-interval <integer>; // obsolete
acache-enable <boolean>; // obsolete
additional-from-auth <boolean>; // obsolete
additional-from-cache <boolean>; // obsolete
allow-new-zones <boolean>;
allow-notify { <address_match_element>; ... };
allow-query { <address_match_element>; ... };
......@@ -535,7 +535,7 @@ view <string> [ <class> ] {
match-clients { <address_match_element>; ... };
match-destinations { <address_match_element>; ... };
match-recursive-only <boolean>;
max-acache-size ( unlimited | <sizeval> );
max-acache-size ( unlimited | <sizeval> ); // obsolete
max-cache-size ( default | unlimited | <sizeval> | <percentage> );
max-cache-ttl <integer>;
max-clients-per-query <integer>;
......
......@@ -133,7 +133,8 @@ check_orderent(const cfg_obj_t *ent, isc_log_t *logctx) {
"compilation time");
#endif
} else if (strcasecmp(cfg_obj_asstring(obj), "random") != 0 &&
strcasecmp(cfg_obj_asstring(obj), "cyclic") != 0) {
strcasecmp(cfg_obj_asstring(obj), "cyclic") != 0 &&
strcasecmp(cfg_obj_asstring(obj), "none") != 0) {
cfg_obj_log(obj, logctx, ISC_LOG_ERROR,
"rrset-order: invalid order '%s'",
cfg_obj_asstring(obj));
......
......@@ -55,7 +55,7 @@ GEOIPLINKOBJS = geoip.@O@
DNSTAPOBJS = dnstap.@O@ dnstap.pb-c.@O@
# Alphabetically
DNSOBJS = acache.@O@ acl.@O@ adb.@O@ badcache.@O@ byaddr.@O@ \
DNSOBJS = acl.@O@ adb.@O@ badcache.@O@ byaddr.@O@ \
cache.@O@ callbacks.@O@ catz.@O@ clientinfo.@O@ compress.@O@ \
db.@O@ dbiterator.@O@ dbtable.@O@ diff.@O@ dispatch.@O@ \
dlz.@O@ dns64.@O@ dnssec.@O@ ds.@O@ dyndb.@O@ ecs.@O@ \
......@@ -95,7 +95,7 @@ GEOIPLINKSRCS = geoip.c
DNSTAPSRCS = dnstap.c dnstap.pb-c.c
DNSSRCS = acache.c acl.c adb.c badcache. byaddr.c \
DNSSRCS = acl.c adb.c badcache. byaddr.c \
cache.c callbacks.c clientinfo.c compress.c \
db.c dbiterator.c dbtable.c diff.c dispatch.c \
dlz.c dns64.c dnssec.c ds.c dyndb.c ecs.c forward.c \
......
This diff is collapsed.
......@@ -27,16 +27,110 @@
#define DCTX_MAGIC ISC_MAGIC('D', 'C', 'T', 'X')
#define VALID_DCTX(x) ISC_MAGIC_VALID(x, DCTX_MAGIC)
#define TABLE_READY \
do { \
unsigned int i; \
\
if ((cctx->allowed & DNS_COMPRESS_READY) == 0) { \
cctx->allowed |= DNS_COMPRESS_READY; \
for (i = 0; i < DNS_COMPRESS_TABLESIZE; i++) \
cctx->table[i] = NULL; \
} \
} while (0)
static unsigned char maptolower[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
/*
* The tableindex array below is of size 256, one entry for each
* unsigned char value. The tableindex array elements are dependent on
* DNS_COMPRESS_TABLESIZE. The table was created using the following