TSAN race between dns_rbtnode_t bitfields
A certain TSAN issue has been repeatedly reported for different branches by TSAN-enabled respdiff tests:
-
v9_18_sub
: https://gitlab.isc.org/isc-private/bind9/-/jobs/3069484 -
v9_16
: https://gitlab.isc.org/isc-private/bind9/-/jobs/3067159 -
v9_16_sub
: https://gitlab.isc.org/isc-private/bind9/-/jobs/3067260
WARNING: ThreadSanitizer: data race
Read of size 1 at 0x000000000001 by thread T1 (mutexes: read M1):
#0 decrement_reference lib/dns/rbtdb.c:2090
#1 detachnode lib/dns/rbtdb.c:5552
#2 rdataset_disassociate lib/dns/rbtdb.c:8874
#3 dns_rdataset_disassociate lib/dns/rdataset.c:113
#4 fctx_destroy lib/dns/resolver.c:4718
#5 fctx_doshutdown lib/dns/resolver.c:4945
#6 task_run lib/isc/task.c:853
#7 isc_task_run lib/isc/task.c:947
#8 isc__nm_async_task lib/isc/netmgr/netmgr.c:861
#9 process_netievent lib/isc/netmgr/netmgr.c:933
#10 process_queue lib/isc/netmgr/netmgr.c:999
#11 process_all_queues lib/isc/netmgr/netmgr.c:780
#12 async_cb lib/isc/netmgr/netmgr.c:809
#13 uv__async_io /usr/src/libuv-v1.44.1/src/unix/async.c:163
#14 isc__trampoline_run lib/isc/trampoline.c:213
Previous write of size 1 at 0x000000000001 by thread T2 (mutexes: write M2, write M3):
#0 add_wildcard_magic lib/dns/rbtdb.c:2808
#1 findnodeintree lib/dns/rbtdb.c:2885
#2 findnode lib/dns/rbtdb.c:2923
#3 dns_db_findnode lib/dns/db.c:441
#4 validated lib/dns/resolver.c:6155
#5 task_run lib/isc/task.c:853
#6 isc_task_run lib/isc/task.c:947
#7 isc__nm_async_task lib/isc/netmgr/netmgr.c:861
#8 process_netievent lib/isc/netmgr/netmgr.c:933
#9 process_queue lib/isc/netmgr/netmgr.c:999
#10 process_all_queues lib/isc/netmgr/netmgr.c:780
#11 async_cb lib/isc/netmgr/netmgr.c:809
#12 uv__async_io /usr/src/libuv-v1.44.1/src/unix/async.c:163
#13 isc__trampoline_run lib/isc/trampoline.c:213
Location is heap block of size 115 at 0x000000000022 allocated by thread T3:
#0 malloc ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:651
#1 default_memalloc lib/isc/mem.c:715
#2 mem_get lib/isc/mem.c:624
#3 isc___mem_get lib/isc/mem.c:1066
#4 isc__mem_get lib/isc/mem.c:2384
#5 create_node lib/dns/rbt.c:2279
#6 dns_rbt_addnode lib/dns/rbt.c:1471
#7 findnodeintree lib/dns/rbtdb.c:2877
#8 findnode lib/dns/rbtdb.c:2923
#9 dns_db_findnode lib/dns/db.c:441
#10 cache_name lib/dns/resolver.c:6457
#11 cache_message lib/dns/resolver.c:6870
#12 resquery_response lib/dns/resolver.c:8274
#13 task_run lib/isc/task.c:853
#14 isc_task_run lib/isc/task.c:947
#15 isc__nm_async_task lib/isc/netmgr/netmgr.c:861
#16 process_netievent lib/isc/netmgr/netmgr.c:933
#17 process_queue lib/isc/netmgr/netmgr.c:999
#18 process_all_queues lib/isc/netmgr/netmgr.c:780
#19 async_cb lib/isc/netmgr/netmgr.c:809
#20 uv__async_io /usr/src/libuv-v1.44.1/src/unix/async.c:163
#21 isc__trampoline_run lib/isc/trampoline.c:213
Mutex M1 is already destroyed.
Mutex M2 is already destroyed.
Mutex M3 is already destroyed.
Thread T1 (running) created by main thread at:
#0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962
#1 isc_thread_create lib/isc/pthreads/thread.c:81
#2 isc__netmgr_create lib/isc/netmgr/netmgr.c:345
#3 isc_managers_create lib/isc/managers.c:28
#4 create_managers main.c:1064
#5 setup main.c:1389
#6 main main.c:1703
Thread T2 (running) created by main thread at:
#0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962
#1 isc_thread_create lib/isc/pthreads/thread.c:81
#2 isc__netmgr_create lib/isc/netmgr/netmgr.c:345
#3 isc_managers_create lib/isc/managers.c:28
#4 create_managers main.c:1064
#5 setup main.c:1389
#6 main main.c:1703
Thread T3 (running) created by main thread at:
#0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962
#1 isc_thread_create lib/isc/pthreads/thread.c:81
#2 isc__netmgr_create lib/isc/netmgr/netmgr.c:345
#3 isc_managers_create lib/isc/managers.c:28
#4 create_managers main.c:1064
#5 setup main.c:1389
#6 main main.c:1703
SUMMARY: ThreadSanitizer: data race lib/dns/rbtdb.c:2090 in decrement_reference
Looking at line numbers, this looks like a race between (line numbers
taken from v9_16_37
):
2089 /* Handle easy and typical case first. */
2090 >>> if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) {
2091 if (isc_refcount_decrement(&node->references) == 1) {
2092 refs = isc_refcount_decrement(&nodelock->references);
2093 INSIST(refs > 0);
2094 return (true);
2095 } else {
2096 return (false);
2097 }
2098 }
and:
2787 static isc_result_t
2788 add_wildcard_magic(dns_rbtdb_t *rbtdb, const dns_name_t *name) {
2789 isc_result_t result;
2790 dns_name_t foundname;
2791 dns_offsets_t offsets;
2792 unsigned int n;
2793 dns_rbtnode_t *node = NULL;
2794
2795 dns_name_init(&foundname, offsets);
2796 n = dns_name_countlabels(name);
2797 INSIST(n >= 2);
2798 n--;
2799 dns_name_getlabelsequence(name, 1, n, &foundname);
2800 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2801 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
2802 return (result);
2803 }
2804 if (result == ISC_R_SUCCESS) {
2805 node->nsec = DNS_RBT_NSEC_NORMAL;
2806 }
2807 node->find_callback = 1;
2808 >>> node->wild = 1;
2809 return (ISC_R_SUCCESS);
2810 }
dirty
and wild
are bitfields:
struct dns_rbtnode {
...
void *data;
uint8_t : 0; /* start of bitfields c/o node lock */
uint8_t dirty : 1;
uint8_t wild : 1;
uint8_t : 0; /* end of bitfields c/o node lock */
uint16_t locknum; /* note that this is not in the bitfield */
isc_refcount_t references;
/*@}*/
};
I cannot recall this issue being reported for the current main
, but
perhaps it gets triggered there as well, just with a lower frequency.