Commit 9c611dd9 authored by Michał Kępień's avatar Michał Kępień

Prevent races when waiting for log messages

The "mirror" system test checks whether log messages announcing a mirror
zone coming into effect are emitted properly.  However, the helper
functions responsible for waiting for zone transfers and zone loading to
complete do not wait for these exact log messages, but rather for other
ones preceding them, which introduces a possibility of false positives.

This problem cannot be addressed by just changing the log message to
look for because the test still needs to discern between transferring a
zone and loading a zone.

Add two new log messages at debug level 99 (which is what named
instances used in system tests are configured with) that are to be
emitted after the log messages announcing a mirror zone coming into
effect.  Tweak the aforementioned helper functions to only return once
the log messages they originally looked for are followed by the newly
added log messages.  This reliably prevents races when looking for
"mirror zone is now in use" log messages and also enables a workaround
previously put into place in the "mirror" system test to be reverted.
parent 2cbf1028
Pipeline #9956 passed with stages
in 16 minutes and 37 seconds
......@@ -17,10 +17,20 @@ RNDCCMD="$RNDC -c $SYSTEMTESTTOP/common/rndc.conf -p ${CONTROLPORT} -s"
# Wait until the transfer of the given zone to ns3 either completes successfully
# or is aborted by a verification failure or a REFUSED response from the master.
# Note that matching on any transfer status is deliberately avoided because some
# checks performed by this test cause transfer attempts to end with the "IXFR
# failed" status, which is followed by an AXFR retry and this test needs to
# check what the result of the latter transfer attempt is.
wait_for_transfer() {
zone=$1
for i in 1 2 3 4 5 6 7 8 9 10; do
nextpartpeek ns3/named.run | egrep "'$zone/IN'.*Transfer status: (success|verify failure|REFUSED)" > /dev/null && return
# Wait until a "freeing transfer context" message is logged
# after one of the transfer results we are looking for is
# logged. This is needed to prevent races when checking for
# "mirror zone is now in use" messages.
nextpartpeek ns3/named.run | \
awk "matched; /'$zone\/IN'.*Transfer status: (success|verify failure|REFUSED)/ {matched=1}" | \
grep "'$zone/IN'.*freeing transfer context" > /dev/null && return
sleep 1
done
echo_i "exceeded time limit waiting for proof of '$zone' being transferred to appear in ns3/named.run"
......@@ -34,7 +44,13 @@ wait_for_load() {
serial=$2
log=$3
for i in 1 2 3 4 5 6 7 8 9 10; do
nextpartpeek $log | egrep "$zone.*(loaded serial $serial|unable to load)" > /dev/null && return
# Wait until a "zone_postload: (...): done" message is logged
# after one of the loading-related messages we are looking for
# is logged. This is needed to prevent races when checking for
# "mirror zone is now in use" messages.
nextpartpeek $log | \
awk "matched; /$zone.*(loaded serial $serial|unable to load)/ {matched=1}" | \
grep "zone_postload: zone $zone/IN: done" > /dev/null && return
sleep 1
done
echo_i "exceeded time limit waiting for proof of '$zone' being loaded to appear in $log"
......@@ -114,23 +130,14 @@ echo_i "checking that an IXFR of an incorrectly signed mirror zone is rejected (
nextpartreset ns3/named.run
ret=0
wait_for_transfer verify-ixfr
# Sanity check: the initial, properly signed version of the zone should have
# been announced as coming into effect.
nextpart ns3/named.run | grep "verify-ixfr.*mirror zone is now in use" > /dev/null || ret=1
# Make a copy of the original zone file for reuse in journal tests below.
cp ns2/verify-ixfr.db.signed ns3/verify-journal.db.mirror
# Wait 1 second so that the zone file timestamp changes and the subsequent
# invocation of "rndc reload" triggers a zone reload. This should also be way
# more than enough for the log message announcing the mirror zone coming into
# effect to appear in the log (see below).
# invocation of "rndc reload" triggers a zone reload.
sleep 1
# Sanity check: the initial, properly signed version of the zone should have
# been announced as coming into effect. Note that we cannot check that
# immediately after wait_for_transfer() as the latter might return before the
# log message we are looking for here appears; we also cannot call nextpart()
# after we update the zone on ns2 since there is a possibility of periodic
# refreshes triggering an IXFR of the "verify-ixfr" zone before the "rndc
# refresh" call below and that possibility needs to be handled as proper
# behavior. Thus, we need to look for the log message now.
nextpart ns3/named.run | grep "verify-ixfr.*mirror zone is now in use" > /dev/null || ret=1
# Update the "verify-ixfr" zone on ns2.
cat ns2/verify-ixfr.db.bad.signed > ns2/verify-ixfr.db.signed
reload_zone verify-ixfr ${UPDATED_SERIAL_BAD}
# Make a copy of the bad zone journal for reuse in journal tests below.
......
......@@ -1534,6 +1534,10 @@ maybe_free(dns_xfrin_ctx_t *xfr) {
dns_zone_log(xfr->zone, ISC_LOG_INFO,
"mirror zone is now in use");
}
xfrin_log(xfr, ISC_LOG_DEBUG(99), "freeing transfer context");
/*
* xfr->zone must not be detached before xfrin_log() is called.
*/
dns_zone_idetach(&xfr->zone);
}
......
......@@ -5069,6 +5069,8 @@ zone_postload(dns_zone_t *zone, dns_db_t *db, isc_time_t loadtime,
DNS_ZONE_CLRFLAG(zone->secure, DNS_ZONEFLG_LOADPENDING);
}
zone_debuglog(zone, "zone_postload", 99, "done");
return (result);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment