Posible memory leak in bind
Summary
I've been running bind as resvoler for many years and my workload hasn't changed much. But somehow memory usage is increased a lot. From my point of view it should use app max 32G of memory:
grep max /etc/opt/isc/scls/isc-bind/named.conf
max-cache-size 32G;
But it's using a lot more.. I've never seen this before.
ps aux --sort rss |grep named
named 869211 74.0 85.7 72655192 55757404 ? Ssl Oct17 21286:14 /opt/isc/isc-bind/root/usr/sbin/named -u named
I've attached a picture of memory usage i percent from my server and max memory are 64G.
free -m
total used free shared buff/cache available
Mem: 63500 62582 594 227 1158 918
Swap: 4095 3034 1061
In the attached file you can se drop in the usage that because I'm patching my server.
BIND version affected
named -V
BIND 9.20.3 (Stable Release) <id:1e2850e>
running on Linux x86_64 5.14.0-427.37.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Sep 13 12:41:50 EDT 2024
built by make with '--build=x86_64-redhat-linux-gnu' '--host=x86_64-redhat-linux-gnu' '--program-prefix=' '--disable-dependency-tracking' '--prefix=/opt/isc/isc-bind/root/usr' '--exec-prefix=/opt/isc/isc-bind/root/usr' '--bindir=/opt/isc/isc-bind/root/usr/bin' '--sbindir=/opt/isc/isc-bind/root/usr/sbin' '--sysconfdir=/etc/opt/isc/scls/isc-bind' '--datadir=/opt/isc/isc-bind/root/usr/share' '--includedir=/opt/isc/isc-bind/root/usr/include' '--libdir=/opt/isc/isc-bind/root/usr/lib64' '--libexecdir=/opt/isc/isc-bind/root/usr/libexec' '--localstatedir=/var/opt/isc/scls/isc-bind' '--sharedstatedir=/var/opt/isc/scls/isc-bind/lib' '--mandir=/opt/isc/isc-bind/root/usr/share/man' '--infodir=/opt/isc/isc-bind/root/usr/share/info' '--enable-warn-error' '--disable-static' '--enable-dnstap' '--with-pic' '--with-gssapi' '--with-json-c' '--with-libxml2' '--without-lmdb' 'build_alias=x86_64-redhat-linux-gnu' 'host_alias=x86_64-redhat-linux-gnu' 'CC=gcc' 'CFLAGS=-O2 -flto=auto -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -march=x86-64-v2 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -fno-omit-frame-pointer' 'LDFLAGS=-Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -L/opt/isc/isc-bind/root/usr/lib64' 'CPPFLAGS= -I/opt/isc/isc-bind/root/usr/include' 'LT_SYS_LIBRARY_PATH=/usr/lib64' 'PKG_CONFIG_PATH=:/opt/isc/isc-bind/root/usr/lib64/pkgconfig:/opt/isc/isc-bind/root/usr/share/pkgconfig' 'SPHINX_BUILD=/builddir/build/BUILD/bind-9.20.3/sphinx/bin/sphinx-build'
compiled by GCC 11.4.1 20231218 (Red Hat 11.4.1-3)
compiled with OpenSSL version: OpenSSL 3.0.7 1 Nov 2022
linked to OpenSSL version: OpenSSL 3.0.7 1 Nov 2022
compiled with libuv version: 1.44.2
linked to libuv version: 1.44.2
compiled with liburcu version: 0.12.1
compiled with jemalloc version: 5.2.1
compiled with libnghttp2 version: 1.43.0
linked to libnghttp2 version: 1.43.0
compiled with libxml2 version: 2.9.13
linked to libxml2 version: 20913
compiled with json-c version: 0.14
linked to json-c version: 0.14
compiled with zlib version: 1.2.11
linked to zlib version: 1.2.11
compiled with protobuf-c version: 1.4.1
linked to protobuf-c version: 1.4.1
threads support is enabled
DNSSEC algorithms: RSASHA256 RSASHA512 ECDSAP256SHA256 ECDSAP384SHA384 ED25519 ED448
DS algorithms: SHA-1 SHA-256 SHA-384
HMAC algorithms: HMAC-MD5 HMAC-SHA1 HMAC-SHA224 HMAC-SHA256 HMAC-SHA384 HMAC-SHA512
TKEY mode 2 support (Diffie-Hellman): no
TKEY mode 3 support (GSS-API): yes
default paths:
named configuration: /etc/opt/isc/scls/isc-bind/named.conf
rndc configuration: /etc/opt/isc/scls/isc-bind/rndc.conf
nsupdate session key: /var/opt/isc/scls/isc-bind/run/named/session.key
named PID file: /var/opt/isc/scls/isc-bind/run/named/named.pid
Steps to reproduce
What is the current bug behavior?
I'm not doing anything different.. Just the bind run for some time. I've also used the same config for years, so no change at that point.
What is the expected correct behavior?
I think bind should not use more than 32G of memory.
Relevant configuration files
named-checkconf -px
acl "customer-acl" {
127.0.0.1/32;
::1/128;
....
};
logging {
channel "spill" {
file "/var/opt/isc/scls/isc-bind/log/spill.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "security" {
file "/var/opt/isc/scls/isc-bind/log/security.log" versions 2 size 52428800;
severity warning;
print-time yes;
print-severity yes;
print-category yes;
};
channel "client" {
file "/var/opt/isc/scls/isc-bind/log/client.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "dnssec" {
file "/var/opt/isc/scls/isc-bind/log/dnssec.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "rate-limit" {
file "/var/opt/isc/scls/isc-bind/log/rate-limit.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "general" {
file "/var/opt/isc/scls/isc-bind/log/general.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "network" {
file "/var/opt/isc/scls/isc-bind/log/network.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "dispatch" {
file "/var/opt/isc/scls/isc-bind/log/dispatch.log" versions 2 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
channel "default" {
file "/var/opt/isc/scls/isc-bind/log/named.log" versions 5 size 52428800;
severity dynamic;
print-time yes;
print-severity yes;
print-category yes;
};
category "spill" {
"spill";
};
category "security" {
"security";
};
category "client" {
"client";
};
category "dnssec" {
"dnssec";
};
category "rate-limit" {
"rate-limit";
};
category "general" {
"general";
};
category "network" {
"network";
};
category "dispatch" {
"dispatch";
};
category "lame-servers" {
"null";
};
category "cname" {
"null";
};
category "update" {
"null";
};
category "queries" {
"null";
};
category "query-errors" {
"null";
};
category "rpz" {
"null";
};
category "default" {
"default";
};
};
options {
directory "/var/opt/isc/scls/isc-bind/named/data";
listen-on port 53 {
"any";
};
listen-on-v6 port 53 {
"any";
};
querylog no;
recursive-clients 1000;
tcp-clients 500;
version "Nothing to see here.";
dnssec-validation auto;
max-cache-size 34359738368;
minimal-responses yes;
query-source address 85.x.x.x port 0;
query-source-v6 2a03:x:x:X::x;
rate-limit {
errors-per-second 50;
ipv4-prefix-length 32;
log-only no;
nxdomains-per-second 50;
responses-per-second 100;
window 5;
};
recursion yes;
response-policy {
zone "autorpz";
zone "manuelrpz";
} break-dnssec yes qname-wait-recurse no;
allow-query {
"customer-acl";
};
};
statistics-channels {
inet 127.0.0.1 port 8080 allow {
127.0.0.1/32;
};
};
zone "autorpz" {
type secondary;
file "autodb.rpz";
primaries {
85.x.x.x;
};
allow-query {
"none";
};
};
zone "manuelrpz" {
type secondary;
file "manueldb.rpz";
primaries {
85.x.x.x;
};
allow-query {
"none";
};
Relevant logs
Edited by Søren Andersen