From 4bae6d8d7351157dce8c23445aea16d99dd4d9a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 19 Apr 2021 10:01:32 +0200 Subject: [PATCH] Fix lock-order-inversion (potential deadlock) in dns_resolver_createfetch There's a lock-order-inversion when running `zone_maintenance()` from the timer while shutting down the server `shutdown_server()`. This only happens when the taskmgr scheduling is more relaxed and paralellized, but the issue is real nevertheless. The associated ThreadSanitizer warning: WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock) Cycle in lock order graph: M1 (0x000000000001) => M2 (0x000000000000) => M1 Mutex M2 acquired here while holding mutex M1 in thread T1: #0 pthread_mutex_lock #1 dns_view_findzonecut lib/dns/view.c:1326:2 #2 fctx_create lib/dns/resolver.c:5144:13 #3 dns_resolver_createfetch lib/dns/resolver.c:10977:12 #4 zone_refreshkeys lib/dns/zone.c:10830:13 #5 zone_maintenance lib/dns/zone.c:11065:5 #6 zone_timer lib/dns/zone.c:14652:2 #7 task_run lib/isc/task.c:857:5 #8 isc_task_run lib/isc/task.c:944:10 #9 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24 #10 process_netievent lib/isc/netmgr/netmgr.c #11 process_queue lib/isc/netmgr/netmgr.c:885:8 #12 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10 #13 process_queues lib/isc/netmgr/netmgr.c:772:7 #14 async_cb lib/isc/netmgr/netmgr.c:671:2 #15 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5 #16 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 #17 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5 #18 nm_thread lib/isc/netmgr/netmgr.c:597:11 #19 isc__trampoline_run lib/isc/trampoline.c:184:11 Mutex M1 previously acquired by the same thread here: #0 pthread_mutex_lock #1 zone_refreshkeys lib/dns/zone.c:10717:2 #2 zone_maintenance lib/dns/zone.c:11065:5 #3 zone_timer lib/dns/zone.c:14652:2 #4 task_run lib/isc/task.c:857:5 #5 isc_task_run lib/isc/task.c:944:10 #6 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24 #7 process_netievent lib/isc/netmgr/netmgr.c #8 process_queue lib/isc/netmgr/netmgr.c:885:8 #9 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10 #10 process_queues lib/isc/netmgr/netmgr.c:772:7 #11 async_cb lib/isc/netmgr/netmgr.c:671:2 #12 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5 #13 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 #14 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5 #15 nm_thread lib/isc/netmgr/netmgr.c:597:11 #16 isc__trampoline_run lib/isc/trampoline.c:184:11 Mutex M1 acquired here while holding mutex M2 in thread T2: #0 pthread_mutex_lock #1 dns_zone_flush lib/dns/zone.c:11443:2 #2 view_flushanddetach lib/dns/view.c:657:5 #3 dns_view_flushanddetach lib/dns/view.c:690:2 #4 shutdown_server bin/named/server.c:10056:4 #5 task_run lib/isc/task.c:857:5 #6 isc_task_run lib/isc/task.c:944:10 #7 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24 #8 process_netievent lib/isc/netmgr/netmgr.c #9 process_queue lib/isc/netmgr/netmgr.c:885:8 #10 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10 #11 process_queues lib/isc/netmgr/netmgr.c:772:7 #12 async_cb lib/isc/netmgr/netmgr.c:671:2 #13 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5 #14 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 #15 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5 #16 nm_thread lib/isc/netmgr/netmgr.c:597:11 #17 isc__trampoline_run lib/isc/trampoline.c:184:11 Mutex M2 previously acquired by the same thread here: #0 pthread_mutex_lock #1 view_flushanddetach lib/dns/view.c:645:3 #2 dns_view_flushanddetach lib/dns/view.c:690:2 #3 shutdown_server bin/named/server.c:10056:4 #4 task_run lib/isc/task.c:857:5 #5 isc_task_run lib/isc/task.c:944:10 #6 isc__nm_async_task lib/isc/netmgr/netmgr.c:730:24 #7 process_netievent lib/isc/netmgr/netmgr.c #8 process_queue lib/isc/netmgr/netmgr.c:885:8 #9 process_tasks_queue lib/isc/netmgr/netmgr.c:756:10 #10 process_queues lib/isc/netmgr/netmgr.c:772:7 #11 async_cb lib/isc/netmgr/netmgr.c:671:2 #12 uv__async_io /home/ondrej/Projects/tsan/libuv/src/unix/async.c:163:5 #13 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 #14 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:392:5 #15 nm_thread lib/isc/netmgr/netmgr.c:597:11 #16 isc__trampoline_run lib/isc/trampoline.c:184:11 Thread T2 (running) created by main thread at: #0 pthread_create #1 isc_thread_create lib/isc/pthreads/thread.c:79:8 #2 isc_nm_start lib/isc/netmgr/netmgr.c:303:3 #3 create_managers bin/named/main.c:957:15 #4 setup bin/named/main.c:1267:11 #5 main bin/named/main.c:1558:2 Thread T2 (running) created by main thread at: #0 pthread_create #1 isc_thread_create lib/isc/pthreads/thread.c:79:8 #2 isc_nm_start lib/isc/netmgr/netmgr.c:303:3 #3 create_managers bin/named/main.c:957:15 #4 setup bin/named/main.c:1267:11 #5 main bin/named/main.c:1558:2 SUMMARY: ThreadSanitizer: lock-order-inversion (potential deadlock) in __interceptor_pthread_mutex_lock (cherry picked from commit 25d27851d8e66a6775654a67817ae53aa0e87317) --- lib/dns/zone.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/dns/zone.c b/lib/dns/zone.c index fa6ed8ab5b6..a742bcb9cf2 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -10760,6 +10760,7 @@ zone_refreshkeys(dns_zone_t *zone) { #ifdef ENABLE_AFL if (!dns_fuzzing_resolver) { #endif /* ifdef ENABLE_AFL */ + UNLOCK_ZONE(zone); result = dns_resolver_createfetch( zone->view->resolver, kname, dns_rdatatype_dnskey, NULL, NULL, NULL, NULL, 0, @@ -10769,6 +10770,7 @@ zone_refreshkeys(dns_zone_t *zone) { 0, NULL, zone->task, keyfetch_done, kfetch, &kfetch->dnskeyset, &kfetch->dnskeysigset, &kfetch->fetch); + LOCK_ZONE(zone); #ifdef ENABLE_AFL } else { result = ISC_R_FAILURE; -- GitLab