TSAN reports indicating reference counting issues with dispatch@netmgr
In the following GitLab CI job, previously unseen TSAN reports have been
generated during the fetchlimit
system test:
https://gitlab.isc.org/isc-projects/bind9/-/jobs/2020086
If I am reading these reports correctly, it seems that a fetch context is simultaneously being destroyed and started, which does not look quite right. It needs a look from @each and/or @ondrej, though, as I may be misinterpreting these reports.
Click here to expand/fold TSAN reports
fctx->altfinds
)
Report 1 (for WARNING: ThreadSanitizer: data race
Write of size 8 at 0x000000000001 by thread T1:
#0 sort_finds lib/dns/resolver.c:3178
#1 fctx_getaddresses lib/dns/resolver.c:3633
#2 fctx_try lib/dns/resolver.c:3912
#3 fctx_start lib/dns/resolver.c:4471
#4 task_run lib/isc/task.c:827
#5 isc_task_run lib/isc/task.c:907
#6 isc__nm_async_task netmgr/netmgr.c:827
#7 process_netievent netmgr/netmgr.c:906
#8 process_queue netmgr/netmgr.c:998
#9 process_all_queues netmgr/netmgr.c:746
#10 async_cb netmgr/netmgr.c:775
#11 <null> <null>
#12 isc__trampoline_run lib/isc/trampoline.c:185
#13 <null> <null>
Previous read of size 8 at 0x000000000001 by thread T2 (mutexes: write M1):
#0 fctx_decreference lib/dns/resolver.c:6881
#1 dns_resolver_destroyfetch lib/dns/resolver.c:10604
#2 fetch_callback lib/ns/query.c:6253
#3 task_run lib/isc/task.c:827
#4 isc_task_run lib/isc/task.c:907
#5 isc__nm_async_task netmgr/netmgr.c:827
#6 process_netievent netmgr/netmgr.c:906
#7 process_queue netmgr/netmgr.c:998
#8 process_all_queues netmgr/netmgr.c:746
#9 async_cb netmgr/netmgr.c:775
#10 <null> <null>
#11 isc__trampoline_run lib/isc/trampoline.c:185
#12 <null> <null>
Location is heap block of size 3728 at 0x000000000017 allocated by thread T2:
#0 malloc <null>
#1 mallocx lib/isc/jemalloc_shim.h:30
#2 mem_get lib/isc/mem.c:341
#3 isc__mem_get lib/isc/mem.c:754
#4 fctx_create lib/dns/resolver.c:4574
#5 dns_resolver_createfetch lib/dns/resolver.c:10463
#6 ns_query_recurse lib/ns/query.c:6455
#7 query_delegation_recurse lib/ns/query.c:8924
#8 query_delegation lib/ns/query.c:8870
#9 query_gotanswer lib/ns/query.c:7607
#10 query_lookup lib/ns/query.c:5989
#11 ns__query_start lib/ns/query.c:5631
#12 query_setup lib/ns/query.c:5344
#13 ns_query_start lib/ns/query.c:12183
#14 ns__client_request lib/ns/client.c:2153
#15 isc__nm_async_readcb netmgr/netmgr.c:2748
#16 isc__nm_readcb netmgr/netmgr.c:2721
#17 udp_recv_cb netmgr/udp.c:418
#18 <null> <null>
#19 isc__trampoline_run lib/isc/trampoline.c:185
#20 <null> <null>
Mutex M1 (0x000000000035) created at:
#0 pthread_mutex_init <null>
#1 isc__mutex_init lib/isc/mutex.c:288
#2 dns_resolver_create lib/dns/resolver.c:9915
#3 dns_view_createresolver lib/dns/view.c:819
#4 configure_view bin/named/server.c:4714
#5 load_configuration bin/named/server.c:9199
#6 loadconfig bin/named/server.c:10380
#7 named_server_reconfigcommand bin/named/server.c:10777
#8 named_control_docommand bin/named/control.c:248
#9 control_command bin/named/controlconf.c:392
#10 task_run lib/isc/task.c:827
#11 isc_task_run lib/isc/task.c:907
#12 isc__nm_async_task netmgr/netmgr.c:827
#13 process_netievent netmgr/netmgr.c:906
#14 process_queue netmgr/netmgr.c:998
#15 process_all_queues netmgr/netmgr.c:746
#16 async_cb netmgr/netmgr.c:775
#17 <null> <null>
#18 isc__trampoline_run lib/isc/trampoline.c:185
#19 <null> <null>
Thread T1 (running) created by main thread at:
#0 pthread_create <null>
#1 isc_thread_create lib/isc/thread.c:79
#2 isc__netmgr_create netmgr/netmgr.c:321
#3 isc_managers_create lib/isc/managers.c:39
#4 create_managers bin/named/main.c:927
#5 setup bin/named/main.c:1200
#6 main bin/named/main.c:1472
Thread T2 (running) created by main thread at:
#0 pthread_create <null>
#1 isc_thread_create lib/isc/thread.c:79
#2 isc__netmgr_create netmgr/netmgr.c:321
#3 isc_managers_create lib/isc/managers.c:39
#4 create_managers bin/named/main.c:927
#5 setup bin/named/main.c:1200
#6 main bin/named/main.c:1472
SUMMARY: ThreadSanitizer: data race lib/dns/resolver.c:3178 in sort_finds
fctx->finds
)
Report 2 (for WARNING: ThreadSanitizer: data race
Write of size 8 at 0x000000000001 by thread T1:
#0 findname lib/dns/resolver.c:3257
#1 fctx_getaddresses lib/dns/resolver.c:3522
#2 fctx_try lib/dns/resolver.c:3912
#3 fctx_start lib/dns/resolver.c:4471
#4 task_run lib/isc/task.c:827
#5 isc_task_run lib/isc/task.c:907
#6 isc__nm_async_task netmgr/netmgr.c:827
#7 process_netievent netmgr/netmgr.c:906
#8 process_queue netmgr/netmgr.c:998
#9 process_all_queues netmgr/netmgr.c:746
#10 async_cb netmgr/netmgr.c:775
#11 <null> <null>
#12 isc__trampoline_run lib/isc/trampoline.c:185
#13 <null> <null>
Previous read of size 8 at 0x000000000001 by thread T2 (mutexes: write M1):
#0 fctx_decreference lib/dns/resolver.c:6880
#1 dns_resolver_destroyfetch lib/dns/resolver.c:10604
#2 fetch_callback lib/ns/query.c:6253
#3 task_run lib/isc/task.c:827
#4 isc_task_run lib/isc/task.c:907
#5 isc__nm_async_task netmgr/netmgr.c:827
#6 process_netievent netmgr/netmgr.c:906
#7 process_queue netmgr/netmgr.c:998
#8 process_all_queues netmgr/netmgr.c:746
#9 async_cb netmgr/netmgr.c:775
#10 <null> <null>
#11 isc__trampoline_run lib/isc/trampoline.c:185
#12 <null> <null>
Location is heap block of size 3728 at 0x000000000017 allocated by thread T2:
#0 malloc <null>
#1 mallocx lib/isc/jemalloc_shim.h:30
#2 mem_get lib/isc/mem.c:341
#3 isc__mem_get lib/isc/mem.c:754
#4 fctx_create lib/dns/resolver.c:4574
#5 dns_resolver_createfetch lib/dns/resolver.c:10463
#6 ns_query_recurse lib/ns/query.c:6455
#7 query_delegation_recurse lib/ns/query.c:8924
#8 query_delegation lib/ns/query.c:8870
#9 query_gotanswer lib/ns/query.c:7607
#10 query_lookup lib/ns/query.c:5989
#11 ns__query_start lib/ns/query.c:5631
#12 query_setup lib/ns/query.c:5344
#13 ns_query_start lib/ns/query.c:12183
#14 ns__client_request lib/ns/client.c:2153
#15 isc__nm_async_readcb netmgr/netmgr.c:2748
#16 isc__nm_readcb netmgr/netmgr.c:2721
#17 udp_recv_cb netmgr/udp.c:418
#18 <null> <null>
#19 isc__trampoline_run lib/isc/trampoline.c:185
#20 <null> <null>
Mutex M1 (0x000000000035) created at:
#0 pthread_mutex_init <null>
#1 isc__mutex_init lib/isc/mutex.c:288
#2 dns_resolver_create lib/dns/resolver.c:9915
#3 dns_view_createresolver lib/dns/view.c:819
#4 configure_view bin/named/server.c:4714
#5 load_configuration bin/named/server.c:9199
#6 loadconfig bin/named/server.c:10380
#7 named_server_reconfigcommand bin/named/server.c:10777
#8 named_control_docommand bin/named/control.c:248
#9 control_command bin/named/controlconf.c:392
#10 task_run lib/isc/task.c:827
#11 isc_task_run lib/isc/task.c:907
#12 isc__nm_async_task netmgr/netmgr.c:827
#13 process_netievent netmgr/netmgr.c:906
#14 process_queue netmgr/netmgr.c:998
#15 process_all_queues netmgr/netmgr.c:746
#16 async_cb netmgr/netmgr.c:775
#17 <null> <null>
#18 isc__trampoline_run lib/isc/trampoline.c:185
#19 <null> <null>
Thread T1 (running) created by main thread at:
#0 pthread_create <null>
#1 isc_thread_create lib/isc/thread.c:79
#2 isc__netmgr_create netmgr/netmgr.c:321
#3 isc_managers_create lib/isc/managers.c:39
#4 create_managers bin/named/main.c:927
#5 setup bin/named/main.c:1200
#6 main bin/named/main.c:1472
Thread T2 (running) created by main thread at:
#0 pthread_create <null>
#1 isc_thread_create lib/isc/thread.c:79
#2 isc__netmgr_create netmgr/netmgr.c:321
#3 isc_managers_create lib/isc/managers.c:39
#4 create_managers bin/named/main.c:927
#5 setup bin/named/main.c:1200
#6 main bin/named/main.c:1472
SUMMARY: ThreadSanitizer: data race lib/dns/resolver.c:3257 in findname