Commit 25d27851 authored by Ondřej Surý's avatar Ondřej Surý
Browse files

Fix lock-order-inversion (potential deadlock) in dns_resolver_createfetch

There's a lock-order-inversion when running `zone_maintenance()` from
the timer while shutting down the server `shutdown_server()`.  This only
happens when the taskmgr scheduling is more relaxed and paralellized,
but the issue is real nevertheless.

The associated ThreadSanitizer warning:

    WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock)
      Cycle in lock order graph: M1 (0x000000000001) => M2 (0x000000000000) => M1

      Mutex M2 acquired here while holding mutex M1 in thread T1:
	#0 pthread_mutex_lock <null>
	#1 dns_view_findzonecut lib/dns/view.c:1326:2
	#2 fctx_create lib/dns/resolver.c:5144:13
	#3 dns_resolver_createfetch lib/dns/resolver.c:10977:12
	#4 zone_refreshkeys lib/dns/zone.c:10830:13
	#5 zone_maintenance lib/dns/zone.c:11065:5
	#6 zone_timer lib/dns/zone.c:14652:2
	#7 task_run lib/isc/task.c:857:5
	#8 isc_task_run lib/isc/task.c:944:10
	#9 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
	#10 process_netievent lib/isc/netmgr/netmgr.c
	#11 process_queue lib/isc/netmgr/netmgr.c:885:8
	#12 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
	#13 process_queues lib/isc/netmgr/netmgr.c:772:7
	#14 async_cb lib/isc/netmgr/netmgr.c:671:2
	#15 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
	#16 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
	#17 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
	#18 nm_thread lib/isc/netmgr/netmgr.c:597:11
	#19 isc__trampoline_run lib/isc/trampoline.c:184:11

      Mutex M1 previously acquired by the same thread here:
	#0 pthread_mutex_lock <null>
	#1 zone_refreshkeys lib/dns/zone.c:10717:2
	#2 zone_maintenance lib/dns/zone.c:11065:5
	#3 zone_timer lib/dns/zone.c:14652:2
	#4 task_run lib/isc/task.c:857:5
	#5 isc_task_run lib/isc/task.c:944:10
	#6 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
	#7 process_netievent lib/isc/netmgr/netmgr.c
	#8 process_queue lib/isc/netmgr/netmgr.c:885:8
	#9 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
	#10 process_queues lib/isc/netmgr/netmgr.c:772:7
	#11 async_cb lib/isc/netmgr/netmgr.c:671:2
	#12 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
	#13 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
	#14 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
	#15 nm_thread lib/isc/netmgr/netmgr.c:597:11
	#16 isc__trampoline_run lib/isc/trampoline.c:184:11

      Mutex M1 acquired here while holding mutex M2 in thread T2:
	#0 pthread_mutex_lock <null>
	#1 dns_zone_flush lib/dns/zone.c:11443:2
	#2 view_flushanddetach lib/dns/view.c:657:5
	#3 dns_view_flushanddetach lib/dns/view.c:690:2
	#4 shutdown_server bin/named/server.c:10056:4
	#5 task_run lib/isc/task.c:857:5
	#6 isc_task_run lib/isc/task.c:944:10
	#7 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
	#8 process_netievent lib/isc/netmgr/netmgr.c
	#9 process_queue lib/isc/netmgr/netmgr.c:885:8
	#10 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
	#11 process_queues lib/isc/netmgr/netmgr.c:772:7
	#12 async_cb lib/isc/netmgr/netmgr.c:671:2
	#13 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
	#14 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
	#15 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
	#16 nm_thread lib/isc/netmgr/netmgr.c:597:11
	#17 isc__trampoline_run lib/isc/trampoline.c:184:11

      Mutex M2 previously acquired by the same thread here:
	#0 pthread_mutex_lock <null>
	#1 view_flushanddetach lib/dns/view.c:645:3
	#2 dns_view_flushanddetach lib/dns/view.c:690:2
	#3 shutdown_server bin/named/server.c:10056:4
	#4 task_run lib/isc/task.c:857:5
	#5 isc_task_run lib/isc/task.c:944:10
	#6 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24
	#7 process_netievent lib/isc/netmgr/netmgr.c
	#8 process_queue lib/isc/netmgr/netmgr.c:885:8
	#9 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10
	#10 process_queues lib/isc/netmgr/netmgr.c:772:7
	#11 async_cb lib/isc/netmgr/netmgr.c:671:2
	#12 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5
	#13 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11
	#14 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5
	#15 nm_thread lib/isc/netmgr/netmgr.c:597:11
	#16 isc__trampoline_run lib/isc/trampoline.c:184:11

      Thread T2 (running) created by main thread at:
	#0 pthread_create <null>
	#1 isc_thread_create lib/isc/pthreads/thread.c:79:8
	#2 isc_nm_start lib/isc/netmgr/netmgr.c:303:3
	#3 create_managers bin/named/main.c:957:15
	#4 setup bin/named/main.c:1267:11
	#5 main bin/named/main.c:1558:2

      Thread T2 (running) created by main thread at:
	#0 pthread_create <null>
	#1 isc_thread_create lib/isc/pthreads/thread.c:79:8
	#2 isc_nm_start lib/isc/netmgr/netmgr.c:303:3
	#3 create_managers bin/named/main.c:957:15
	#4 setup bin/named/main.c:1267:11
	#5 main bin/named/main.c:1558:2

    SUMMARY: ThreadSanitizer: lock-order-inversion (potential deadlock) in __interceptor_pthread_mutex_lock
parent 3f0c8189
Pipeline #69281 failed with stages
in 19 minutes and 38 seconds
......@@ -10827,6 +10827,7 @@ zone_refreshkeys(dns_zone_t *zone) {
#ifdef ENABLE_AFL
if (!dns_fuzzing_resolver) {
#endif /* ifdef ENABLE_AFL */
UNLOCK_ZONE(zone);
result = dns_resolver_createfetch(
zone->view->resolver, kname,
dns_rdatatype_dnskey, NULL, NULL, NULL, NULL, 0,
......@@ -10836,6 +10837,7 @@ zone_refreshkeys(dns_zone_t *zone) {
0, NULL, zone->task, keyfetch_done, kfetch,
&kfetch->dnskeyset, &kfetch->dnskeysigset,
&kfetch->fetch);
LOCK_ZONE(zone);
#ifdef ENABLE_AFL
} else {
result = ISC_R_FAILURE;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment