repeated `rndc reload` or `rndc reconfig` on bind 9.11.3 and 9.11.4 causes named memory usage to grow.
Summary
Each run of rndc reload
or rndc reconfig
, with bind 9.11.3 and 9.11.4, in our configuration, causes named memory usage to grow.
Steps to reproduce
Run rndc reload
or rndc reconfig
repeatedly, without changing the configuration.
What is the current bug behavior?
Memory usage increases per run of rndc reload
.
What is the expected correct behavior?
Memory usage should remain relatively constant.
Relevant configuration files
$ sudo named-checkconf -t /var/named/chroot -px
acl "real_localhost" {
127.0.0.1/32;
};
acl "lan_hosts" {
10.0.0.0/8;
172.16.0.0/12;
192.168.0.0/16;
};
acl "dns_resolvers" {
"lan_hosts";
};
controls {
inet 127.0.0.1 port 953 allow {
"localhost";
} keys {
"rndckey";
};
};
logging {
channel "general" {
file "/var/log/named.log";
severity notice;
print-time yes;
print-severity yes;
print-category yes;
};
channel "verbose" {
file "/var/log/verbose.log";
severity debug 1;
print-time yes;
print-severity yes;
print-category yes;
};
channel "query" {
file "/var/log/query.log";
severity info;
print-time yes;
print-severity no;
print-category no;
};
category "default" {
"general";
"verbose";
};
category "queries" {
"query";
};
};
options {
directory "/var/named";
dump-file "data/cache_dump.db";
listen-on {
"any";
};
listen-on-v6 {
"any";
};
memstatistics-file "data/named_mem_stats.txt";
pid-file "/var/run/named/named.pid";
querylog no;
statistics-file "data/named_stats.txt";
use-v4-udp-ports {
range 57345 61000;
};
auth-nxdomain no;
max-cache-size 15728640;
no-case-compress {
"localhost";
"lan_hosts";
};
recursion yes;
rrset-order {
order random;
};
allow-query {
"localhost";
};
allow-transfer {
"none";
};
forward only;
forwarders {
10.86.100.108;
10.86.110.129;
10.86.144.123;
10.86.95.123;
10.86.96.101;
10.86.97.126;
};
notify no;
};
key "rndckey" {
algorithm "hmac-md5";
secret "????????????????????????????????????????????????????????????";
};
zone "twitter.com.smf1.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.smf1.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.atla.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.atla.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.atla.twttr.net" {
type master;
file "db.empty";
};
zone "twttr.net.atla.twttr.net" {
type master;
file "db.empty";
};
zone "twitter.com.atlb.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.atlb.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.atlb.twttr.net" {
type master;
file "db.empty";
};
zone "twttr.net.atlb.twttr.net" {
type master;
file "db.empty";
};
zone "twitter.com.smfc.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.smfc.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.atlc.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.atlc.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.atlc.twttr.net" {
type master;
file "db.empty";
};
zone "twttr.net.atlc.twttr.net" {
type master;
file "db.empty";
};
zone "twitter.com.prod.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.prod.twttr.net" {
type master;
file "db.empty";
};
zone "twttr.net.prod.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.prod.twttr.net" {
type master;
file "db.empty";
};
zone "twitter.com.corpdc.twitter.com" {
type master;
file "db.empty";
};
zone "twitter.com.corpdc.twttr.net" {
type master;
file "db.empty";
};
zone "twttr.net.corpdc.twitter.com" {
type master;
file "db.empty";
};
zone "twttr.net.corpdc.twttr.net" {
type master;
file "db.empty";
};
zone "twtter.com" {
type master;
file "db.empty";
};
zone "twitter.com.twttr.net" {
type master;
file "db.empty";
};
zone "twttr.net.twttr.net" {
type master;
file "db.empty";
};
zone "." {
type hint;
file "root.hint";
};
zone "localhost" {
type master;
file "db.localhost";
};
zone "0.0.127.in-addr.arpa" {
type master;
file "db.127.0.0";
};
Relevant logs and/or screenshots
We created core dumps by sending signal 11 to named
, from several machines with varying memory usage.
first-pass naïve analysis shows that the strings 'KSATtstA' and 'udpdispatch' loosely correlate with memory usage of the process. These hosts represent bind 9.11.3.
$ for i in smf* ; do echo = $i = ; strings -a $i | sort | uniq -c | sort -nr | head -n10 ; done
= smf1-azg-31-sr1 =
65978 KSATtstA
64625 udpdispatch
6475 tSeD
2624 pMEMlpmA
2253 nSND
1710 !fuB
1492 twitter
1257 CmeMxcmA
1197 L'jh
1197 disp_sepool
= smf1-dha-15-sr1 =
8297 KSATtstA
7168 udpdispatch
2212 tSeD
1088 CmeMxcmA
584 twitter
564 nSND
406 kLWR
375 !fuB
326 pMEMlpmA`
279 ONBR
= smf1-duy-24-sr1 =
24776 KSATtstA
23596 udpdispatch
3224 tSeD
1251 nSND
1136 CmeMxcmA
982 pMEMlpmA
910 !fuB
838 twitter
437 psiD
437 disp_sepool
= smf1-duz-23-sr1 =
24777 KSATtstA 7
23579 udpdispatch
4300 tSeD
1278 nSND
1136 CmeMxcmA
982 pMEMlpmA
928 !fuB
856 twitter
437 psiD
437 disp_sepool
We also tested out 9.11.4:
$ ps auxww | grep \[n]amed
named 205598 5.9 0.0 1282692 229648 ? Ssl 21:30 0:02 /usr/sbin/named -u named -c /etc/named.conf -t /var/named/chroot -c /etc/named.conf
$ sudo rndc reload
server reload successful
$ ps auxww | grep \[n]amed
named 205598 9.8 0.1 1282172 291564 ? Ssl 21:30 0:04 /usr/sbin/named -u named -c /etc/named.conf -t /var/named/chroot -c /etc/named.conf
$ sudo rndc reload
server reload successful
$ ps auxww | grep \[n]amed
named 205598 14.3 0.1 1282172 346904 ? Ssl 21:30 0:07 /usr/sbin/named -u named -c /etc/named.conf -t /var/named/chroot -c /etc/named.conf
$ sudo rndc reload
server reload successful
$ ps auxww | grep \[n]amed
named 205598 18.9 0.1 1282172 291992 ? Ssl 21:30 0:09 /usr/sbin/named -u named -c /etc/named.conf -t /var/named/chroot -c /etc/named.conf
After six more rndc reload
commands:
$ ps auxww | grep \[n]amed
named 205598 31.6 0.1 1348028 409972 ? Ssl 21:30 0:38 /usr/sbin/named -u named -c /etc/named.conf -t /var/named/chroot -c /etc/named.conf
We forced a core and found similar results:
$ strings -a core.205598 | sort | uniq -c | sort -nr | head -n10
65972 KSATtstA
64699 udpdispatch
5480 tSeD
2612 pMEMlpmA`
1667 nSND
1522 !fuB
1251 CmeMxcmA
1197 psiD
1197 disp_sepool
1008 disp_portpool
Possible fixes
These strings correspond to various magic values, suggesting that some path rndc reload
and rndc reconfig
take is leaking udpdispatch
structures tagged with ISCAPI_TASK_MAGIC and ONDESTROY_MAGIC. We have a valgrind report, which was inconclusive.
lib/isc/ondestroy.c:23:#define ONDESTROY_MAGIC ISC_MAGIC('D', 'e', 'S', 't')
lib/isc/task.c:89:#define TASK_MAGIC ISC_MAGIC('T', 'A', 'S', 'K')
lib/isc/include/isc/task.h:166:#define ISCAPI_TASK_MAGIC ISC_MAGIC('A','t','s','t')