Resolver stuck in get_attached_and_locked_entry()
My home resolver running BIND 9.19.11-dev (Development Release) <id:137bb3c>
on Fedora 37 stuck after 30-minute runtime, not being able to resolve a thing.
I aborted it to get a core:
#0 0x00007fec9c2b1489 in ___pthread_mutex_lock (mutex=0x7fec96fe2210) at pthread_mutex_lock.c:136
#1 0x00007fec9ce3a5fb in get_attached_and_locked_entry (adb=adb@entry=0x7fec9bae0300, now=now@entry=1676459708, addr=addr@entry=0x7ffc70aa48e0) at adb.c:1492
#2 0x00007fec9ce3ade6 in import_rdataset (adbname=adbname@entry=0x7fec458a9900, rdataset=rdataset@entry=0x7ffc70aa4be0, now=now@entry=1676459708) at adb.c:606
#3 0x00007fec9ce3b5ed in dbfind_name (adbname=0x7fec458a9900, now=1676459708, rdtype=rdtype@entry=1) at adb.c:2875
#4 0x00007fec9ce3f95a in dns_adb_createfind (adb=<optimized out>, task=<optimized out>, action=action@entry=0x7fec9cf2262f <fctx_finddone>, arg=<optimized out>, name=name@entry=0x7ffc70aa58d0,
qname=<optimized out>, qtype=<optimized out>, options=<optimized out>, now=<optimized out>, target=<optimized out>, port=<optimized out>, depth=<optimized out>, qc=<optimized out>, findp=<optimized out>)
at adb.c:2200
#5 0x00007fec9cf1d199 in findname (fctx=<optimized out>, fctx@entry=0x7fec0283e400, name=name@entry=0x7ffc70aa58d0, port=port@entry=0, options=63, options@entry=31, flags=flags@entry=0, now=1676459708,
overquota=0x7ffc70aa5860, need_alternate=0x7ffc70aa5817, no_addresses=0x7ffc70aa5818) at resolver.c:3473
#6 0x00007fec9cf218bc in fctx_getaddresses (badcache=false, fctx=0x7fec0283e400) at resolver.c:3820
#7 fctx_try (fctx=<optimized out>, retrying=retrying@entry=false, badcache=badcache@entry=false) at resolver.c:4215
#8 0x00007fec9cf224ef in fctx_start (fctx=<optimized out>) at resolver.c:4588
#9 0x00007fec9d23bdb5 in isc__job_cb (idle=0x7fec4585b0c8) at job.c:75
#10 0x00007fec9d7b12a1 in uv__run_idle (loop=0x7fec9bab8620) at src/unix/loop-watcher.c:68
#11 uv_run (loop=loop@entry=0x7fec9bab8620, mode=mode@entry=UV_RUN_DEFAULT) at src/unix/core.c:399
#12 0x00007fec9d241faa in loop_run (loop=0x7fec9bab8600) at loop.c:270
#13 loop_thread (arg=0x7fec9bab8600) at loop.c:297
#14 0x00007fec9d243209 in isc_loopmgr_run (loopmgr=0x7fec9ba09a80) at loop.c:481
#15 0x0000000000425488 in main (argc=4, argv=0x7ffc70aa6158) at main.c:1513
This is how I built it:
autoreconf -fi && CC=/opt/gcc-latest/bin/gcc CFLAGS="-fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g -Wall -Wextra" ./configure --enable-developer --enable-dnstap --enable-geoip --with-libidn2 --disable-maintainer-mode --enable-option-checking=fatal --with-cmocka --with-libxml2 --with-json-c --with-jemalloc && make -j && sudo make install
thread apply all bt full
Thread 2 (Thread 0x7fec3b3ff6c0 (LWP 746975)):
#0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7fec9d7d30cc <cond.lto_priv+44>) at futex-internal.c:57
sc_cancel_oldtype = 0
__arg6 = <optimized out>
__arg3 = <optimized out>
_a5 = <optimized out>
_a2 = <optimized out>
sc_ret = <optimized out>
__arg4 = <optimized out>
__arg1 = <optimized out>
_a6 = <optimized out>
_a3 = <optimized out>
resultvar = <optimized out>
__arg5 = <optimized out>
__arg2 = <optimized out>
_a4 = <optimized out>
_a1 = <optimized out>
#1 __futex_abstimed_wait_common (futex_word=futex_word@entry=0x7fec9d7d30cc <cond.lto_priv+44>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0, cancel=cancel@entry=true) at futex-internal.c:87
err = <optimized out>
clockbit = 256
op = 393
#2 0x00007fec9c2aad7f in __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7fec9d7d30cc <cond.lto_priv+44>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at futex-internal.c:139
No locals.
#3 0x00007fec9c2ad510 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7fec9d7d3120 <mutex.lto_priv>, cond=0x7fec9d7d30a0 <cond.lto_priv>) at pthread_cond_wait.c:503
spin = 0
buffer = {__routine = 0x7fec9c2ad290 <__condvar_cleanup_waiting>, __arg = 0x7fec3b3fe160, __canceltype = -1683941088, __prev = 0x0}
cbuffer = {wseq = 11, cond = 0x7fec9d7d30a0 <cond.lto_priv>, mutex = 0x7fec9d7d3120 <mutex.lto_priv>, private = 0}
err = <optimized out>
g = 1
flags = <optimized out>
g1_start = <optimized out>
maxspin = 0
signals = <optimized out>
result = 0
wseq = 11
seq = 5
private = 0
maxspin = <optimized out>
err = <optimized out>
result = <optimized out>
wseq = <optimized out>
g = <optimized out>
seq = <optimized out>
flags = <optimized out>
private = <optimized out>
signals = <optimized out>
done = <optimized out>
g1_start = <optimized out>
spin = <optimized out>
buffer = <optimized out>
cbuffer = <optimized out>
s = <optimized out>
#4 ___pthread_cond_wait (cond=0x7fec9d7d30a0 <cond.lto_priv>, mutex=0x7fec9d7d3120 <mutex.lto_priv>) at pthread_cond_wait.c:618
No locals.
#5 0x00007fec9d7c135d in uv_cond_wait (cond=<optimized out>, mutex=<optimized out>) at src/unix/thread.c:800
No locals.
#6 0x00007fec9d7b4b6e in worker (arg=0x0) at src/threadpool.c:76
w = <optimized out>
q = <optimized out>
is_slow_work = <optimized out>
#7 0x00007fec9c2ae12d in start_thread (arg=<optimized out>) at pthread_create.c:442
ret = <optimized out>
pd = <optimized out>
out = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140652583057088, -4241293198359379780, -3200, 11, 140722198697360, 140652574666752, 4250149215491211452, 4249953406154195132}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
#8 0x00007fec9c32fbc0 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
No locals.
Thread 1 (Thread 0x7fec9ccc6400 (LWP 746974)):
#0 0x00007fec9c2b1489 in ___pthread_mutex_lock (mutex=0x7fec96fe2210) at pthread_mutex_lock.c:136
type = <optimized out>
__PRETTY_FUNCTION__ = "___pthread_mutex_lock"
id = <optimized out>
#1 0x00007fec9ce3a5fb in get_attached_and_locked_entry (adb=adb@entry=0x7fec9bae0300, now=now@entry=1676459708, addr=addr@entry=0x7ffc70aa48e0) at adb.c:1492
_ret = <optimized out>
result = <optimized out>
adbentry = 0x7fec96fe2200
timenow = {seconds = 1676459708, nanoseconds = 0}
last_update = 1676459708
hashval = 3275498353
locktype = <optimized out>
__func__ = "get_attached_and_locked_entry"
#2 0x00007fec9ce3ade6 in import_rdataset (adbname=adbname@entry=0x7fec458a9900, rdataset=rdataset@entry=0x7ffc70aa4be0, now=now@entry=1676459708) at adb.c:606
nh = 0x0
in6a = {__in6_u = {__u6_addr8 = "\340H\252p\374\177\000\000\320\000\000\000\000\000\000", __u6_addr16 = {18656, 28842, 32764, 0, 208, 0, 0, 0}, __u6_addr32 = {1890207968, 32764, 208, 0}}}
anh = <optimized out>
hookhead = 0x7fec458a9b18
entry = 0x0
rdata = {data = 0x7fec458330ba "\306)", length = 4, rdclass = 1, type = 1, flags = 0, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}}
sockaddr = {type = {sa = {sa_family = 2, sa_data = "\000\000\306)\000\004\000\000\000\000\000\000\000"}, sin = {sin_family = 2, sin_port = 0, sin_addr = {s_addr = 67119558}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 0, sin6_flowinfo = 67119558, sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}, ss = {ss_family = 2, __ss_padding = "\000\000\306)\000\004", '\000' <repeats 111 times>, __ss_align = 0}, sunix = {sun_family = 2, sun_path = "\000\000\306)\000\004", '\000' <repeats 101 times>}}, length = 16, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}}
ina = {s_addr = 67119558}
result = ISC_R_SUCCESS
adb = 0x7fec9bae0300
rdtype = 1
__func__ = "import_rdataset"
#3 0x00007fec9ce3b5ed in dbfind_name (adbname=0x7fec458a9900, now=1676459708, rdtype=rdtype@entry=1) at adb.c:2875
result = ISC_R_SUCCESS
rdataset = {magic = 1145983826, methods = 0x7fec9d00c620 <rdataset_methods>, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}, rdclass = 1, type = 1, ttl = 516597, trust = 4, covers = 0, attributes = 4194304, count = 4796214, resign = 0, private1 = 0x7fec9badff80, private2 = 0x7fec45ab8820, private3 = 0x7fec458330b0, privateuint4 = 0, private5 = 0x7fec458330b6, private6 = 0x0, private7 = 0x0}
adb = 0x7fec9bae0300
foundname = {name = {magic = 1145983854, ndata = 0x7ffc70aa4ae0 "\001a\froot-servers\003net", length = 20, labels = 4, attributes = {absolute = true, readonly = false, dynamic = false, dynoffsets = false, nocompress = false, cache = false, answer = false, ncache = false, chaining = false, chase = false, wildcard = false, prerequisite = false, update = false, hasupdaterec = false}, offsets = 0x7ffc70aa4a20 "", buffer = 0x7ffc70aa4aa0, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}, list = {head = 0x0, tail = 0x0}}, offsets = "\000\002\017\023\374\177\000\000O\277\354\234\354\177\000\000\260[\252p\374\177\000\000\240L\252p\374\177", '\000' <repeats 26 times>, "\260[\252p\374\177\000\000\000\000\000\000\000\000\000\000", '\377' <repeats 16 times>, "\001", '\000' <repeats 15 times>, "\300\226\312\340\353\177\000\000\000\000\000\000\000\002\000\000\340Y\252p\374\177\000", buffer = {magic = 1114990113, base = 0x7ffc70aa4ae0, length = 255, used = 20, current = 0, active = 0, extra = 0, dynamic = false, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}, mctx = 0x0}, data = "\001a\froot-servers\003net", '\000' <repeats 13 times>, " K\252p\374\177\000\000\027\244\346\234\354\177\000\000\000\000\000\000\000\000\000\000\260[\252p\374\177\000\000\260[\252p\374\177\000\000\177N$\235\354\177\000\000\274\276\354c\000\000\000\000\200\276\240\233\354\177\000\000\060\000\000\000\000\000\000\000\260\253\255\233\354\177\000\000\200K\252p\374\177\000\000\307Z$\235\354\177\000\000\260\253\255\233\354\177\000\000(\233\212E\354\177\000\000\000\003\256\233\354\177\000\000\230K\252p\374\177\000\000PL\252p\374\177\000\000*\225\343\234\354\177\000\000`L\252p\374\177\000\000\000\000\000\000\000\000\000\000\220X̜\354\177\000\000\230"...}
fname = 0x7ffc70aa49d0
#4 0x00007fec9ce3f95a in dns_adb_createfind (adb=<optimized out>, task=<optimized out>, action=action@entry=0x7fec9cf2262f <fctx_finddone>, arg=<optimized out>, name=name@entry=0x7ffc70aa58d0, qname=<optimized out>, qtype=<optimized out>, options=<optimized out>, now=<optimized out>, target=<optimized out>, port=<optimized out>, depth=<optimized out>, qc=<optimized out>, findp=<optimized out>) at adb.c:2200
result = ISC_R_UNEXPECTED
find = <optimized out>
adbname = 0x7fec458a9900
want_event = true
start_at_zone = false
alias = false
have_address = false
wanted_addresses = <optimized out>
wanted_fetches = 0
query_pending = 0
namebuf = '\000' <repeats 1023 times>
again = <optimized out>
__func__ = "dns_adb_createfind"
#5 0x00007fec9cf1d199 in findname (fctx=<optimized out>, fctx@entry=0x7fec0283e400, name=name@entry=0x7ffc70aa58d0, port=port@entry=0, options=63, options@entry=31, flags=flags@entry=0, now=1676459708, overquota=0x7ffc70aa5860, need_alternate=0x7ffc70aa5817, no_addresses=0x7ffc70aa5818) at resolver.c:3473
ai = 0x0
find = 0x0
res = 0x7fec9ba1a400
unshared = true
result = <optimized out>
#6 0x00007fec9cf218bc in fctx_getaddresses (badcache=false, fctx=0x7fec0283e400) at resolver.c:3820
overquota = false
ai = 0x0
all_spilled = true
no_addresses = 0
ns_processed = 0
result = <optimized out>
now = 1676459708
ns = {common = {rdclass = 1, rdtype = 2, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}}, mctx = 0x0, name = {magic = 1145983854, ndata = 0x7fec9ba160ca "\001A\fROOT-SERVERS\003NET", length = 20, labels = 4, attributes = {absolute = true, readonly = false, dynamic = false, dynoffsets = false, nocompress = false, cache = false, answer = false, ncache = false, chaining = false, chase = false, wildcard = false, prerequisite = false, update = false, hasupdaterec = false}, offsets = 0x0, buffer = 0x0, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}, list = {head = 0x0, tail = 0x0}}}
need_alternate = false
fwd = <optimized out>
all_bad = <optimized out>
rdata = {data = 0x7fec9ba160ca "\001A\fROOT-SERVERS\003NET", length = 20, rdclass = 1, type = 2, flags = 0, link = {prev = 0xffffffffffffffff, next = 0xffffffffffffffff}}
res = 0x7fec9ba1a400
stdoptions = 31
rdata = <optimized out>
result = <optimized out>
res = <optimized out>
now = <optimized out>
stdoptions = <optimized out>
fwd = <optimized out>
ai = <optimized out>
all_bad = <optimized out>
ns = <optimized out>
need_alternate = <optimized out>
all_spilled = <optimized out>
no_addresses = <optimized out>
ns_processed = <optimized out>
normal_nses = <optimized out>
next = <optimized out>
out = <optimized out>
forwarders = <optimized out>
name = <optimized out>
suffix = <optimized out>
fixed = <optimized out>
domain = <optimized out>
labels = <optimized out>
cur = <optimized out>
overquota = <optimized out>
family = <optimized out>
a = <optimized out>
cur = <optimized out>
__atomic_load_ptr = <optimized out>
__atomic_load_tmp = <optimized out>
expire = <optimized out>
i = <optimized out>
#7 fctx_try (fctx=<optimized out>, retrying=retrying@entry=false, badcache=badcache@entry=false) at resolver.c:4215
result = <optimized out>
addrinfo = <optimized out>
res = 0x7fec9ba1a400
done = <optimized out>
__func__ = "fctx_try"
#8 0x00007fec9cf224ef in fctx_start (fctx=<optimized out>) at resolver.c:4588
__func__ = "fctx_start"
#9 0x00007fec9d23bdb5 in isc__job_cb (idle=0x7fec4585b0c8) at job.c:75
job = 0x7fec4585b0c0
r = <optimized out>
__func__ = "isc__job_cb"
#10 0x00007fec9d7b12a1 in uv__run_idle (loop=0x7fec9bab8620) at src/unix/loop-watcher.c:68
h = <optimized out>
queue = {0x7fec4585b670, 0x7fec45a3b5f0}
q = <optimized out>
#11 uv_run (loop=loop@entry=0x7fec9bab8620, mode=mode@entry=UV_RUN_DEFAULT) at src/unix/core.c:399
timeout = <optimized out>
r = 1
can_sleep = <optimized out>
#12 0x00007fec9d241faa in loop_run (loop=0x7fec9bab8600) at loop.c:270
r = <optimized out>
job = 0x0
r = <optimized out>
job = <optimized out>
__func__ = "loop_run"
next = <optimized out>
#13 loop_thread (arg=0x7fec9bab8600) at loop.c:297
loop = 0x7fec9bab8600
#14 0x00007fec9d243209 in isc_loopmgr_run (loopmgr=0x7fec9ba09a80) at loop.c:481
__func__ = "isc_loopmgr_run"
#15 0x0000000000425488 in main (argc=4, argv=0x7ffc70aa6158) at main.c:1513
result = <optimized out>
!7528 (merged) seem suspicious since it changed get_attached_and_locked_entry()
today.
I don't see anything worrying in the log.