Commit 6b8a5487 authored by Ondřej Surý's avatar Ondřej Surý
Browse files

Merge branch '1747-fix-race-in-rndc-when-shutting-down' into 'main'

Resolve "BIND 9.16.1 does core dump when stopped"

Closes #1747

See merge request isc-projects/bind9!3740
parents 402e1654 60520940
5453. [bug] `named` would crash on shutdown when new `rndc`
connection is received at the same time as
shutting down. [GL #1747]
5452. [bug] The "blackhole" ACL was accidentally disabled with
respect to client queries. [GL #1936]
 
......
......@@ -226,6 +226,7 @@ shutdown_listener(controllistener_t *listener) {
if (listener->listening) {
isc_socket_cancel(listener->sock, listener->task,
ISC_SOCKCANCEL_ACCEPT);
return;
}
maybe_free_listener(listener);
......@@ -636,6 +637,8 @@ control_newconn(isc_task_t *task, isc_event_t *event) {
UNUSED(task);
REQUIRE(listener->listening);
listener->listening = false;
if (nevent->result != ISC_R_SUCCESS) {
......@@ -647,6 +650,14 @@ control_newconn(isc_task_t *task, isc_event_t *event) {
}
sock = nevent->newsocket;
/* Is the server shutting down? */
if (listener->controls->shuttingdown) {
isc_socket_detach(&sock);
shutdown_listener(listener);
goto cleanup;
}
isc_socket_setname(sock, "control", NULL);
(void)isc_socket_getpeername(sock, &peeraddr);
if (listener->type == isc_sockettype_tcp &&
......@@ -1121,36 +1132,33 @@ add_listener(named_controls_t *cp, controllistener_t **listenerp,
listener = isc_mem_get(mctx, sizeof(*listener));
if (result == ISC_R_SUCCESS) {
listener->mctx = NULL;
isc_mem_attach(mctx, &listener->mctx);
listener->controls = cp;
listener->task = cp->server->task;
listener->address = *addr;
listener->sock = NULL;
listener->listening = false;
listener->exiting = false;
listener->acl = NULL;
listener->type = type;
listener->perm = 0;
listener->owner = 0;
listener->group = 0;
listener->readonly = false;
ISC_LINK_INIT(listener, link);
ISC_LIST_INIT(listener->keys);
ISC_LIST_INIT(listener->connections);
listener->mctx = NULL;
isc_mem_attach(mctx, &listener->mctx);
listener->controls = cp;
listener->task = cp->server->task;
listener->address = *addr;
listener->sock = NULL;
listener->listening = false;
listener->exiting = false;
listener->acl = NULL;
listener->type = type;
listener->perm = 0;
listener->owner = 0;
listener->group = 0;
listener->readonly = false;
ISC_LINK_INIT(listener, link);
ISC_LIST_INIT(listener->keys);
ISC_LIST_INIT(listener->connections);
/*
* Make the acl.
*/
if (control != NULL && type == isc_sockettype_tcp) {
allow = cfg_tuple_get(control, "allow");
result = cfg_acl_fromconfig(allow, config, named_g_lctx,
aclconfctx, mctx, 0,
&new_acl);
} else {
result = dns_acl_any(mctx, &new_acl);
}
/*
* Make the acl.
*/
if (control != NULL && type == isc_sockettype_tcp) {
allow = cfg_tuple_get(control, "allow");
result = cfg_acl_fromconfig(allow, config, named_g_lctx,
aclconfctx, mctx, 0, &new_acl);
} else {
result = dns_acl_any(mctx, &new_acl);
}
if ((result == ISC_R_SUCCESS) && (control != NULL)) {
......
......@@ -76,7 +76,7 @@ TESTS += \
rpzrecurse
endif HAVE_PERLMOD_NET_DNS
TESTS += \
TESTS += \
acl \
additional \
addzone \
......@@ -107,7 +107,7 @@ TESTS += \
geoip2 \
glue \
idna \
include-multiplecfg \
include-multiplecfg \
inline \
integrity \
kasp \
......@@ -134,6 +134,7 @@ TESTS += \
rrsetorder \
rsabigexponent \
runtime \
shutdown \
sfcache \
smartsign \
sortlist \
......
......@@ -114,6 +114,7 @@ rrsetorder
rsabigexponent
runtime
sfcache
shutdown
smartsign
sortlist
spf
......
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
rm -f ns*/*.jnl
rm -f ns*/named.conf
rm -f ns*/named.lock
rm -f ns*/named.memstats
rm -f ns*/named.run
rm -f ns*/rpz*.txt
rm -f resolver/named.conf
rm -rf __pycache__
rm -f *.status
############################################################################
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
############################################################################
import os
import pytest
def pytest_configure(config):
config.addinivalue_line(
"markers", "dnspython: mark tests that need dnspython to function"
)
def pytest_collection_modifyitems(config, items):
# pylint: disable=unused-argument,unused-import,too-many-branches
# pylint: disable=import-outside-toplevel
# Test for dnspython module
skip_dnspython = pytest.mark.skip(
reason="need dnspython module to run")
try:
import dns.query # noqa: F401
except ModuleNotFoundError:
for item in items:
if "dnspython" in item.keywords:
item.add_marker(skip_dnspython)
@pytest.fixture
def named_port(request):
# pylint: disable=unused-argument
port = os.getenv("PORT")
if port is None:
port = 5301
else:
port = int(port)
return port
@pytest.fixture
def control_port(request):
# pylint: disable=unused-argument
port = os.getenv("CONTROLPORT")
if port is None:
port = 5301
else:
port = int(port)
return port
key rndc_key {
secret "1234abcd8765";
algorithm hmac-sha256;
};
controls {
inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
query-source address 10.53.0.1;
notify-source 10.53.0.1;
transfer-source 10.53.0.1;
port @PORT@;
listen-on { 10.53.0.1; };
pid-file "named.pid";
notify no;
dnssec-validation no;
allow-query { any; };
recursion yes;
allow-recursion { any; };
};
# Delegate .test domain to 10.53.0.2
zone "." {
type master;
file "root.db";
allow-transfer { none; };
};
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
$TTL 300
@ IN SOA a.root. root.test. (
2000042100 ; serial
600 ; refresh
600 ; retry
1200 ; expire
600 ; minimum
)
. IN NS a.root.
a.root. IN A 10.53.0.1
test IN NS ns1.test
ns1.test IN A 10.53.0.2
key rndc_key {
secret "1234abcd8765";
algorithm hmac-sha256;
};
controls {
inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
query-source address 10.53.0.2;
notify-source 10.53.0.2;
transfer-source 10.53.0.2;
port @PORT@;
listen-on { 10.53.0.2; };
pid-file "named.pid";
notify no;
dnssec-validation no;
allow-query { any; };
};
# 10.53.0.2 is authoritative for .test domain
zone "test" {
type master;
file "test.db";
allow-transfer { none; };
};
$TTL 300
@ IN SOA ns1 root.test. 2020040101 4h 1h 1w 60
@ IN NS ns1
ns1 IN A 10.53.0.2
@ IN A 10.53.0.2
www IN A 10.53.0.2
key rndc_key {
secret "1234abcd8765";
algorithm hmac-sha256;
};
controls {
inet 10.53.0.3 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
query-source address 10.53.0.3;
notify-source 10.53.0.3;
transfer-source 10.53.0.3;
port @PORT@;
listen-on { 10.53.0.3; };
pid-file "named.pid";
notify no;
dnssec-validation no;
allow-query { any; };
allow-recursion { any; };
};
zone "." {
type hint;
file "root.db";
};
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
$TTL 300
. IN SOA a.root. root.root. (
2000042100 ; serial
600 ; refresh
600 ; retry
1200 ; expire
600 ; minimum
)
. IN NS a.root.
a.root. IN A 10.53.0.1
#! /bin/sh
#
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
# touch dnsrps-off to not test with DNSRPS
set -e
SYSTEMTESTTOP=..
. $SYSTEMTESTTOP/conf.sh
copy_setports ns1/named.conf.in ns1/named.conf
copy_setports ns2/named.conf.in ns2/named.conf
copy_setports resolver/named.conf.in resolver/named.conf
#!/usr/bin/python3
############################################################################
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
############################################################################
from concurrent.futures import ThreadPoolExecutor, as_completed
import os
import random
import subprocess
from string import ascii_lowercase as letters
import time
import dns.resolver
import pytest
def do_work(named_proc, resolver, rndc_cmd, kill_method, n_workers, n_queries):
"""Creates a number of A queries to run in parallel
in order simulate a slightly more realistic test scenario.
The main idea of this function is to create and send a bunch
of A queries to a target named instance and during this process
a request for shutting down named will be issued.
In the process of shutting down named, a couple control connections
are created (by launching rndc) to ensure that the crash was fixed.
if kill_method=="rndc" named will be asked to shutdown by
means of rndc stop.
if kill_method=="sigterm" named will be killed by SIGTERM on
POSIX systems or by TerminateProcess() on Windows systems.
:param named_proc: named process instance
:type named_proc: subprocess.Popen
:param resolver: target resolver
:type resolver: dns.resolver.Resolver
:param rndc_cmd: rndc command with default arguments
:type rndc_cmd: list of strings, e.g. ["rndc", "-p", "23750"]
:kill_method: "rndc" or "sigterm"
:type kill_method: str
:param n_workers: Number of worker threads to create
:type n_workers: int
:param n_queries: Total number of queries to send
:type n_queries: int
"""
# pylint: disable-msg=too-many-arguments
# pylint: disable-msg=too-many-locals
# helper function, args must be a list or tuple with arguments to rndc.
def launch_rndc(args):
return subprocess.call(rndc_cmd + args, timeout=10)
# We're going to execute queries in parallel by means of a thread pool.
# dnspython functions block, so we need to circunvent that.
executor = ThreadPoolExecutor(n_workers + 1)
# Helper dict, where keys=Future objects and values are tags used
# to process results later.
futures = {}
# 50% of work will be A queries.
# 1 work will be rndc stop.
# Remaining work will be rndc status (so we test parallel control
# connections that were crashing named).
shutdown = True
for i in range(n_queries):
if i < (n_queries // 2):
# Half work will be standard A queries.
# Among those we split 50% queries relname='www',
# 50% queries relname=random characters
if random.randrange(2) == 1:
tag = "good"
relname = "www"
else:
tag = "bad"
length = random.randint(4, 10)
relname = "".join(letters[
random.randrange(len(letters))] for i in range(length))
qname = relname + ".test"
futures[executor.submit(resolver.query, qname, 'A')] = tag
elif shutdown: # We attempt to stop named in the middle
shutdown = False
if kill_method == "rndc":
futures[executor.submit(launch_rndc, ['stop'])] = 'stop'
else:
futures[executor.submit(named_proc.terminate)] = 'kill'
else:
# We attempt to send couple rndc commands while named is
# being shutdown
futures[executor.submit(launch_rndc, ['status'])] = 'status'
ret_code = -1
for future in as_completed(futures):
try:
result = future.result()
# If tag is "stop", result is an instance of
# subprocess.CompletedProcess, then we check returncode
# attribute to know if rncd stop command finished successfully.
#
# if tag is "kill" then the main function will check if
# named process exited gracefully after SIGTERM signal.
if futures[future] == "stop":
ret_code = result
except (dns.resolver.NXDOMAIN, dns.exception.Timeout):
pass
if kill_method == "rndc":
assert ret_code == 0
executor.shutdown()
@pytest.mark.dnspython
def test_named_shutdown(named_port, control_port):
# pylint: disable-msg=too-many-locals
cfg_dir = os.path.join(os.getcwd(), "resolver")
assert os.path.isdir(cfg_dir)
cfg_file = os.path.join(cfg_dir, "named.conf")
assert os.path.isfile(cfg_file)
named = os.getenv("NAMED")
assert named is not None
rndc = os.getenv("RNDC")
assert rndc is not None
systest_dir = os.getenv("SYSTEMTESTTOP")
assert systest_dir is not None
# rndc configuration resides in $SYSTEMTESTTOP/common/rndc.conf
rndc_cfg = os.path.join(systest_dir, "common", "rndc.conf")
assert os.path.isfile(rndc_cfg)
# rndc command with default arguments.
rndc_cmd = [rndc, "-c", rndc_cfg, "-p", str(control_port),
"-s", "10.53.0.3"]
# Helper function, launch named without blocking.
def launch_named():
proc = subprocess.Popen([named, "-c", cfg_file, "-f"], cwd=cfg_dir)
# Ensure named is running
assert proc.poll() is None
return proc
# We create a resolver instance that will be used to send queries.
resolver = dns.resolver.Resolver()
resolver.nameservers = ['10.53.0.3']
resolver.port = named_port
# We test named shutting down using two methods:
# Method 1: using rndc ctop
# Method 2: killing with SIGTERM
# In both methods named should exit gracefully.
for kill_method in ("rndc", "sigterm"):
named_proc = launch_named()
time.sleep(2)
do_work(named_proc, resolver, rndc_cmd,
kill_method, n_workers=12, n_queries=16)
# Wait named to exit for a maximum of MAX_TIMEOUT seconds.
MAX_TIMEOUT = 10
is_dead = False
for _ in range(MAX_TIMEOUT):
if named_proc.poll() is not None:
is_dead = True
break
time.sleep(1)
if not is_dead:
named_proc.kill()
assert is_dead
# Ensures that named exited gracefully.
# If it crashed (abort()) exitcode will be non zero.
assert named_proc.returncode == 0
......@@ -62,3 +62,6 @@ Bug Fixes
client queries. Blocked IP addresses were not used for upstream
queries but queries from those addresses could still be answered.
[GL #1936]
- ``named`` would crash on shutdown when new ``rndc`` connection is received at
the same time as shutting down. [GL #1747]
......@@ -784,6 +784,10 @@
./bin/tests/system/sfcache/ns5/sign.sh SH 2018,2019,2020
./bin/tests/system/sfcache/setup.sh SH 2014,2016,2017,2018,2019,2020
./bin/tests/system/sfcache/tests.sh SH 2014,2016,2017,2018,2019,2020
./bin/tests/system/shutdown/clean.sh SH 2020
./bin/tests/system/shutdown/conftest.py PYTHON 2020
./bin/tests/system/shutdown/setup.sh SH 2020
./bin/tests/system/shutdown/tests-shutdown.py PYTHON-BIN 2020
./bin/tests/system/smartsign/clean.sh SH 2010,2012,2014,2016,2018,2019,2020
./bin/tests/system/smartsign/tests.sh SH 2010,2011,2012,2014,2016,2017,2018,2019,2020
./bin/tests/system/sortlist/clean.sh SH 2000,2001,2004,2007,2009,2012,2014,2015,2016,2018,2019,2020
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment